---
title: "Replication matierals for 'Public Health Communication Reduces COVID-19 Misinformation Sharing and Boosts Self-Efficacy'"
output:
  html_document:
    toc: true
    toc_float:
      copllapsed: true
    df_print: paged
    toc_depth: 4
    theme: flatly
    code_folding: hide
  html_notebook:
    code_folding: hide
editor_options:
  chunk_output_type: console
---

# Setup
```{r}
#knitr::opts_chunk$set(cache = TRUE)
#knitr::opts_chunk$set(echo = TRUE)
#knitr::opts_chunk$set(include = TRUE)
#knitr::opts_chunk$set(message = TRUE)
#knitr::opts_chunk$set(fig.show = "always")

install.packages("pacman")
library("pacman")
pacman::p_load(
  modelsummary,
  fixest,
  summarytools,
  glue,
  rstatix,
  purrr,
  ggeffects,
  sjPlot,
  sjmisc,
  car,
  ggeffects,
  here,
  haven,
  pwr, # Power analysis
  stats,
  rlang,
  janitor,
  ggplot2,
  dplyr,
  ggthemes,
  margins,
  estimatr,
  tidyverse,
  broom,
  lmtest,
  htmltools,
  gridExtra,
  scales,
  rio,
  stringr,
  readxl,
  kableExtra,
  magick,
  labelled,
  leaflet,
  marginaleffects
)

# Set theme
theme_set(theme_bw())
theme_update(
  panel.spacing = unit(1, "lines"),
  panel.border = element_rect(fill = NA),
  panel.background = element_rect(fill = "white"),
  panel.grid.major = element_blank(),
  panel.grid.minor = element_blank(),
  strip.background = element_rect(fill = "gray90"),
             strip.text.y = element_text( size = 10)) # Attrubute text size)


# Remove scientific notation
options(scipen = 100)

# DROP ZERO FOR SCALE
dropLeadingZero <- function(l){
  str_replace(l, '0(?=.)', '')
}

# CREATE NO_ZERO FUNCTION FOR GGPLOT

no_zero <- function(x) {
  y <- sprintf('%.1f',x)
  y[x > 0 & x < 1] <- sprintf('.%s',x[x > 0 & x < 1]*10)
  y[x == 0] <- '0'
  y[x > -1 & x < 0] <- sprintf('-.%s',x[x > -1 & x < 0]*-10)
  y
}


```

# Data wrangling

## Load packages
```{r}
#install_packages("pacman")

pacman::p_load(tidyverse,
       here,
       readxl,
       haven,
       janitor,
       scales,
       modelsummary,
       labelled,
       pwr)

```



## Pretest
```{r}
dfraw <- read_dta(here::here("pretest_data.dta"))

# Clean names
df_clean <- clean_names(dfraw)

# Rehaping
# step1: First I reshape for each task and the corresponding rating 
df <- df_clean %>% 
  dplyr::select(caseid, gender, profile_age, profile_age2, region, profile_education, profile_education_rc, personal_income, household_income, ft_next, ft19, device_category, tot_time, endtime, duration, weight, accuracypic_order_1:accuracypic_order_44,
                accuracypic1:accuracypic_order_44) %>% 
  pivot_longer(col = c(accuracypic1:accuracypic44),
               names_to = "headline",
               names_prefix = "accuracypic",
               values_to = "rating",
               values_drop_na = TRUE)

# Rename caseid
df <- df %>%
  dplyr::rename(resid = caseid)
#names(df)

# From colum 46-89 (accuracypic_order_), keep only the 13-20th letter
names(df)[17:60] <- str_sub(names(df)[17:60],13,20)

#names(df)
# Rescale headline from 1-4 to 0-1
df$rating <- rescale(as.numeric(df$rating), to = c(0,1))

# Save headline as numeric?

# Turn it into a factor
df$headlineraw <- df$headline
df$headlinefac <- as.factor(df$headlineraw)
df$headline <- factor(df$headline,
                               levels=c(1:44),
                               labels=c("f1.","f2","f3","f4","f5",
                                        "f6","f7","f8","f9","f10", 
                                        "f11","f12","f13","f14","f15",
                                        "f16","f17","f18","f19","f20",
                                        "r1","r2","r3","r4","r5",
                                        "r6","r7","r8","r9","r10",
                                        "r11","r12","r13","r14","r15",
                                        "r16","r17","r18","r19","r20",
                                        "n1","n2","n3","n4"))

df$headline_text <- factor(df$headlinefac, 
                           levels=c(1:44),
                           labels=c(
                             "Kokosolie har historisk kunnet ødelægge vira, herunder coronavirus.",
                             "C-vitamin beskytter mod coronavirus",
                             "Levende orm i mundbind! Smid dem ud!",
                             "Coronavirus: Nordkoreas første bekræftede smittede blev skudt.",
                             "Næsetest skader din hjerne.",
                             "Store mængder C-vitamin sænker dødeligheden og alvorlig sygdom for coronavirus.",
                             "Vaccinerede i Israel har 40 gange mere dødelighed - israelske forskere taler om en ny Holocaust",
                             "Vaccineudvikler anbefaler naturlig helbredelse i stedet for vaccination.",
                             "Verdenssundhedsorganisationen (WHO) bekræfter ved et ”uheld”, at Covid-19 IKKE er mere farlig end influenza.",
                             "Covid-19 er planlagt! Millioner af Covid-19 testsæt blev solgt i 2017 og 2018!", 
                             "50 procent af coronapatienterne på sygehuse kommer fra arabiske lande.",
                             "Sygehuse modtager bagudrettet 'provision', hvis Covid-19 angives som dødsårsag.",
                             "Skandale på plejehjem. 8 ud af 31 døde efter tvungen BioNTech/Pfizer vaccine.",
                             "Bill Gates udnytter massevaccinationer til hemmelig plan.",
                             "Johns Hopkins University bekræfter: Du kan blive vaccineret med en PCR-test uden at vide det.",
                             "920 kvinder mister deres ufødte børn efter vaccination",
                             "The Lancet afslører bedrag: Covid-19 Vaccinernes effektivitet er på 1.2% IKKE 90–95%",
                             "FN’s sundhedseksperter indrømmer, at giftige vaccineingredienser skader børn over hele verden",
                             "Pfizer-dokument bekræfter: Kontakt med vaccinerede fører til menstruationssmerter og abort",
                             "Dødstallet fra vaccinerne overstiger nu dødstallet fra covid-19",
                             "Danmark sætter AstraZeneca-vaccine på pause: Mistanke om bivirkninger med blodpropper",
                             "Andet stik af vaccinen mod corona giver flere bivirkninger: Fire procent oplever bivirkninger efter første vaccination mod Covid-19",
                             "Forskere: Problematisk at studiet bag AstraZeneca-beslutning ikke er offentligt tilgængeligt",
                             "Hver fjerde med corona er ikkevestlig indvandrer eller efterkommer",
                             "Danmark og Sverige gik hver sin vej, da coronakrisen ramte - sådan er det gået",
                             "Tredje sag med omdiskuteret corona-paragraf: Risikerer dobbeltstraf for at kaste fyrværkeri",
                             "Ny undersøgelse: Corona-nedlukning og hjemmearbejde giver dårlig trivsel blandt medarbejderne",
                             "Tre tilskuere testet positiv med variant - Heunicke opfordrer 4000 til at blive testet",
                             "Sundhedsprofessor: Vi bruger »grotesk« mange penge på coronaen",
                             "9 af 10 børn har ikke rørt sig nok under corona. Vinterens nedlukning har ført til inaktivitet hos børn og unge, viser undersøgelse.",
                             "Tysk epidemiolog nedtoner vaccinerisiko: Corona er meget værre",
                             "Norsk sundhedsansat er død efter blodprop efter vaccine. Det undersøges, om der er en sammenhæng mellem AstraZeneca …",
                             "Studie: Folk med lav virusmængde smitter mere end antaget",
                             "I dag bliver den første danske kommune færdigvaccineret",
                             "Corona har kostet dyrt: Tivoli får underskud på 143 millioner kroner",
                             "Corona skubber flere ud i ekstrem fattigdom: Nye pandemier kan få nemmere fat",
                             "Så mange danskere har taget på under coronakrisen: ”Det er et kæmpe eksperiment med vores sundhed”",
                             "Sundhedsministeriet afviste at støtte lovende dansk vaccine trods anbefaling",
                             "Psykologformand: De mentale følgevirkninger af corona vil sidde i os og samfundet i lang tid",
                             "Åndenød og huller i hukommelsen: Patienter står i kø med senfølger efter corona",
                             "Influencer anmeldt for skjult reklame for tøj og tandbehandlinger",
                             "Unge føler mere stress, ensomhed og nedtrykthed - skyldes det sociale medier?",
                             "Matador genudsendes: Ser Lise Nørgaard mon selv serien?",
                             "Forskere opdager forbløffende mange supertunge stjerner"))


df <- df %>% 
  mutate(headline3 = case_when(str_detect(headline, "^f") ~ "fake",
                              str_detect(headline, "^r") ~ "real",
                              str_detect(headline, "^n") ~ "non"
                              ))




# attempt to str_remove
df$order_1 <- str_remove(df$order_1, "Headline ")
df$order_2 <- str_remove(df$order_2, "Headline ")
df$order_3 <- str_remove(df$order_3, "Headline ")
df$order_4 <- str_remove(df$order_4, "Headline ")
df$order_5 <- str_remove(df$order_5, "Headline ")
df$order_6 <- str_remove(df$order_6, "Headline ")
df$order_7 <- str_remove(df$order_7, "Headline ")
df$order_8 <- str_remove(df$order_8, "Headline ")
df$order_9 <- str_remove(df$order_9, "Headline ")
df$order_10 <- str_remove(df$order_10, "Headline ")
df$order_11 <- str_remove(df$order_11, "Headline ")
df$order_12 <- str_remove(df$order_12, "Headline ")
df$order_13 <- str_remove(df$order_13, "Headline ")
df$order_14 <- str_remove(df$order_14, "Headline ")
df$order_15 <- str_remove(df$order_15, "Headline ")
df$order_16 <- str_remove(df$order_16, "Headline ")
df$order_17 <- str_remove(df$order_17, "Headline ")
df$order_18 <- str_remove(df$order_18, "Headline ")
df$order_19 <- str_remove(df$order_19, "Headline ")
df$order_20 <- str_remove(df$order_20, "Headline ")
df$order_21 <- str_remove(df$order_21, "Headline ")
df$order_22 <- str_remove(df$order_22, "Headline ")
df$order_23 <- str_remove(df$order_23, "Headline ")
df$order_24 <- str_remove(df$order_24, "Headline ")
df$order_25 <- str_remove(df$order_25, "Headline ")
df$order_26 <- str_remove(df$order_26, "Headline ")
df$order_27 <- str_remove(df$order_27, "Headline ")
df$order_28 <- str_remove(df$order_28, "Headline ")
df$order_29 <- str_remove(df$order_29, "Headline ")
df$order_30 <- str_remove(df$order_30, "Headline ")
df$order_31 <- str_remove(df$order_31, "Headline ")
df$order_32 <- str_remove(df$order_32, "Headline ")
df$order_33 <- str_remove(df$order_33, "Headline ")
df$order_34 <- str_remove(df$order_34, "Headline ")
df$order_35 <- str_remove(df$order_35, "Headline ")
df$order_36 <- str_remove(df$order_36, "Headline ")
df$order_37 <- str_remove(df$order_37, "Headline ")
df$order_38 <- str_remove(df$order_38, "Headline ")
df$order_39 <- str_remove(df$order_39, "Headline ")
df$order_40 <- str_remove(df$order_40, "Headline ")
df$order_41 <- str_remove(df$order_41, "Headline ")
df$order_42 <- str_remove(df$order_42, "Headline ")
df$order_43 <- str_remove(df$order_43, "Headline ")
df$order_44 <- str_remove(df$order_44, "Headline ")


# Create order variable where headline = order position
df <- df %>% 
  mutate(order = case_when(
    headline == order_1 ~ 1,    headline == order_2 ~ 2,
    headline == order_3 ~ 3,    headline == order_4 ~ 4,
    headline == order_5 ~ 5,    headline == order_6 ~ 6,
    headline == order_7 ~ 7,    headline == order_8 ~ 8,
    headline == order_9 ~ 9,    headline == order_10 ~ 10,
    headline == order_11 ~ 11,    headline == order_12 ~ 12,
    headline == order_13 ~ 13,    headline == order_14 ~ 14,
    headline == order_15 ~ 15,    headline == order_16 ~ 16,
    headline == order_17 ~ 17,    headline == order_18 ~ 18,
    headline == order_19 ~ 19,    headline == order_20 ~ 20,
    headline == order_21 ~ 21,    headline == order_22 ~ 22,
    headline == order_23 ~ 23,    headline == order_24 ~ 24,
    headline == order_25 ~ 25,    headline == order_26 ~ 26,
    headline == order_27 ~ 27,    headline == order_28 ~ 28,
    headline == order_29 ~ 29,    headline == order_30 ~ 30,
    headline == order_31 ~ 31,    headline == order_32 ~ 32,
    headline == order_33 ~ 33,    headline == order_34 ~ 34,
    headline == order_35 ~ 35,    headline == order_36 ~ 36,
    headline == order_37 ~ 37,    headline == order_38 ~ 38,
    headline == order_39 ~ 39,    headline == order_40 ~ 40,
    headline == order_41 ~ 41,    headline == order_42 ~ 42,
    headline == order_43 ~ 43,    headline == order_44 ~ 44)
    )


# Save dataset
wave_pretest <- df %>% 
  dplyr::select(resid, rating, headline, headline_text, order, headline3)


save(wave_pretest, file = here::here("wave_pretest.Rdata"))
# save(dataobjectname, file = here::here("foldername", "namefile"))


```

## Wave 1
```{r }
dfraw <- read_dta(here::here("wave1.dta"))

# Clean names
df_clean <- clean_names(dfraw)
df <- df_clean

# Rename caseid
df <- df %>%
  dplyr::rename(resid = caseid)
  

df$wave <- 1

############### RECODINGS
# some_user_1:8
df %>% 
  dplyr::select(some_user_1,
                some_user_2,
                some_user_3,
                some_user_4,
                some_user_5,
                some_user_6,
                some_user_7,
                some_user_8) %>% 
  datasummary_skim()

names(df)[names(df) == "some_user_1"] <- "facebook_user"


# intention_share_new_1:9(other)

# screener1_1:9(other)

df$screener1_other = tolower(df$screener1_other) # Convert values in column to lowercase
df$screen_nyheder <-
  ifelse(grepl("nyheder", df$screener1_other), 1,
         ifelse(grepl("nyhedwr", df$screener1_other), 1,
                ifelse(grepl("nyhedder", df$screener1_other), 1,
                       ifelse(grepl("nydeder", df$screener1_other), 1, 0))))  # If screener contains correct value = 1. # We include misspellings, but not "nyhederne" as it refers to an actual program

df$screen_nyheder <- factor(df$screen_nyheder, 
                     levels = c(0, 
                                1),
                     labels = c("Fail",
                                "Pass"))



########## acc_importance ---------------------------------
df <- df %>% 
  mutate(acc_importance = factor(acc_importance,
                                 levels = c(1,2,3,4),
                                 labels = c("Slet ikke",
                                            "I mindre grad",
                                            "I nogen grad",
                                            "I høj grad")))

#### Recode vaccine intention to a yes/no vaccine intention -----------------
df <- df %>% 
  mutate(vaccine_intention2 = factor(vaccine_intention,
                                 levels = c(1,2,3,4,5,6),
                                 labels = c("Yes",
                                            "Yes",
                                            "Yes",
                                            "No",
                                            "Yes",
                                            "No")))
##### trust_inst1_1:5 ------------------------------------------
df$trust_inst1_1[df$trust_inst1_1== 97 ] <- NA
df$trust_inst1_2[df$trust_inst1_2== 97 ] <- NA
df$trust_inst1_3[df$trust_inst1_3== 97 ] <- NA
df$trust_inst1_4[df$trust_inst1_4== 97 ] <- NA
df$trust_inst1_5[df$trust_inst1_5== 97 ] <- NA

# Change to intuitive names
names(df)[names(df) == "trust_inst1_1"] <- "trust_health"
names(df)[names(df) == "trust_inst1_2"] <- "trust_police"
names(df)[names(df) == "trust_inst1_3"] <- "trust_media"
names(df)[names(df) == "trust_inst1_4"] <- "trust_science"
names(df)[names(df) == "trust_inst1_5"] <- "trust_govern"

#p <- ggplot(data = df,
#            mapping = aes(x = trust_govern))
#p + geom_histogram()

df <- df %>% 
  mutate(index_trust = (trust_health+trust_police+trust_media+trust_science+trust_govern)/5)

# Test alpha
alpha_trust <- data.frame(df[, c("trust_health",
                  "trust_police",
                  "trust_media",
                  "trust_science",
                  "trust_govern")]) %>%  # assign columns to a dataframe "scale1"
  psych::alpha() # calculate cronbachs alpha

# Rescale and relabel - health
df$trust_health <- rescale(as.numeric(df$trust_health))
val_labels(df$trust_health) <- c("Slet ingen tillid" = 0, "Fuld tillid" = 1)

# Rescale and relabel - police
df$trust_police <- rescale(as.numeric(df$trust_police))
val_labels(df$trust_police) <- c("Slet ingen tillid" = 0, "Fuld tillid" = 1)

# Rescale and relabel - media
df$trust_media <- rescale(as.numeric(df$trust_media))
val_labels(df$trust_media) <- c("Slet ingen tillid" = 0, "Fuld tillid" = 1)

# Rescale and relabel - science
df$trust_science <- as.numeric(df$trust_science)
df$trust_science <- rescale(df$trust_science)
val_labels(df$trust_science) <- c("Slet ingen tillid" = 0, "Fuld tillid" = 1)

# Rescale and relabel
df$trust_govern <- rescale(as.numeric(df$trust_govern))
val_labels(df$trust_govern) <- c("Slet ingen tillid" = 0, "Fuld tillid" = 1)

df$index_trust <- rescale(as.numeric(df$index_trust))
val_labels(df$index_trust) <- c("Slet ingen tillid" = 0, "Fuld tillid" = 1)


####### polknow1_1:11 --------------------------------------

# polkno23, polknow3, polknow4



###################################################################################################
#################                   INDEX     ###################################################
###################################################################################################

# social comparison -----------------------------------------------------

# Create index for attention to social comparison: socialcompar_1:13
df$socialcompar_13rev <- dplyr::recode(df$socialcompar_13, 
       `1`=7, 
       `2`=6, 
       `3`=5, 
       `5`=3, 
       `6`=2, 
       `7`=1)

df <- df %>% 
  mutate(index_socialcom = (socialcompar_1+
                      socialcompar_2+
                      socialcompar_3+
                      socialcompar_4+
                      socialcompar_5+
                      socialcompar_6+
                      socialcompar_7+
                      socialcompar_8+
                      socialcompar_9+
                      socialcompar_10+
                      socialcompar_11+
                      socialcompar_12+
                      socialcompar_13rev))


df$index_socialcom <- rescale(as.numeric(df$index_socialcom, to = c(0,1)))  # Rescale to 0-1

socialcom_label <- c("Low social attention" = 0,
                 "High social attention" = 1)
val_labels(df$index_socialcom) <- socialcom_label

p <- ggplot(data = df,
            mapping = aes(x = index_socialcom))
p + geom_histogram(bins =30)

# Test alpha 
alpha_socialcom <- data.frame(df[ ,c("socialcompar_1","socialcompar_2","socialcompar_3",
                      "socialcompar_4",
                      "socialcompar_5",
                      "socialcompar_6",
                      "socialcompar_7",
                      "socialcompar_8",
                      "socialcompar_9",
                      "socialcompar_10",
                      "socialcompar_11",
                      "socialcompar_12",
                      "socialcompar_13rev")]) %>%  # assign columns to a dataframe "scale1"
  psych::alpha() # calculate cronachs alpha

# crt  -----------------------------------------------------

# Recode crt in right and wrong answers and divde by three. crt1:3
# right answers
# 1: 5 kroner
# 2: 5minutter
# 3: 47 dage

df <- df %>%  
  mutate(crt1_true = 
           case_when(
             crt1 == 5 ~ 1,
             crt1 != 5 ~ 0)) %>% 
  mutate(crt2_true = 
           case_when(
             crt2 == 5 ~ 1,
             crt2 != 5 ~ 0)) %>% 
  mutate(crt3_true = 
           case_when(
             crt3 == 47 ~ 1,
             crt3 != 47 ~ 0)) %>% 
  mutate(index_crt = (crt1_true+crt2_true+crt3_true)/3)


# Test alpha
alpha_crt <- data.frame(df[, c("crt1_true", "crt2_true", "crt3_true")]) %>%  # assign columns to a dataframe 
  psych::alpha() # calculate cronachs alpha. Alpha = 0.66

# Need for cognition -----------------------------------------------------------------
df <- df %>%
  mutate(across(
    c("needfcog_3",
        "needfcog_4",
        "needfcog_5",
        "needfcog_7",
        "needfcog_8",
        "needfcog_9",
        "needfcog_12",
        "needfcog_16",
        "needfcog_17"),
    ~ case_when(
      .x == 1 ~ 7,
      .x == 2 ~ 6,
      .x == 3 ~ 5,
      .x == 5 ~ 3,
      .x == 6 ~ 2,
      .x == 7 ~ 1,
      TRUE ~ .x  # Keep other values as is
    ),
    .names = "{col}rev"
  ))

df %>% select(needfcog_3, needfcog_3rev)

alpha_nfc <- data.frame(df[, c("needfcog_1",
                    "needfcog_2",
                    "needfcog_3rev",
                    "needfcog_4rev",
                    "needfcog_5rev",
                    "needfcog_6",
                    "needfcog_7rev",
                    "needfcog_8rev",
                    "needfcog_9rev",
                    "needfcog_10",
                    "needfcog_11",
                    "needfcog_12rev",
                    "needfcog_13",
                    "needfcog_14",
                    "needfcog_15",
                    "needfcog_16rev",
                    "needfcog_17rev",
                    "needfcog_18")]) %>% # assign columns to a dataframe
  psych::alpha() # calculate cronachs alpha. Alpha = 0.86

df <- df %>%
  mutate(
    index_nfc = (
      needfcog_1 +
        needfcog_2 +
        needfcog_3rev +
        needfcog_4rev +
        needfcog_5rev +
        needfcog_6 +
        needfcog_7rev +
        needfcog_8rev +
        needfcog_9rev +
        needfcog_10 +
        needfcog_11 +
        needfcog_12rev +
        needfcog_13 +
        needfcog_14 +
        needfcog_15 +
        needfcog_16rev +
        needfcog_17rev +
        needfcog_18)/18)

df$index_nfc <- rescale(df$index_nfc)

#### SAVE DATA WAVE 1 ----
wave1 <- df
save(wave1, file = here::here( "wave1_data.Rdata"))

```

## Wave 2
```{r}
df <- read_dta(here::here("wave2.dta")) %>% 
  clean_names() %>% 
  rename(treatment = splitgroup) 

df$wave = 2
names(df)[136:175] <-gsub("\\_","",names(df)[136:175]) # Remove underscore from sharing_n

df_raw <- df

#Reshape from wide to long using sharing
df <- df_raw %>%
  pivot_longer(col = c(sharing1:sharing40),
               names_to = "headline",
               names_prefix = "sharing",
               values_to = "sharing_all",
               values_drop_na = FALSE) %>%
  dplyr::select(caseid_w2, sharing_all, headline, treatment, m_m_headline_rnd, m_m_headline_rnd_1, -fake_list_to_show_1:-real_list_to_show_20, everything()) %>% 
  mutate(headline = as.numeric(headline)) %>% 
  dplyr::rename(resid = caseid_w1,
                resid2 = caseid_w2)



df <- df %>%  # Create dichotomous "real" variable
  mutate(
    real =
      case_when(headline <= 20 ~ 0,
                headline > 20 ~ 1),
    treatment = factor(
      treatment,
      levels = c(1, 2, 3, 4),
      labels = c("Control",
                 "Nudge",
                 "Short",
                 "Long")
    ),
    real = factor(
      real,
      levels = c(0, 1),
      labels = c("fake",
                 "real")
    )
  )


# Create a sharing_fake DV
df <- df %>% 
  mutate(sharing_fake = case_when(
    real == "fake" & sharing_all == 1 ~ 1,
    real == "fake" & sharing_all == 2 ~ 2,
    real == "fake" & sharing_all == 3 ~ 3,
    real == "fake" & sharing_all == 4 ~ 4,
    real == "fake" & sharing_all == 5 ~ 5,
    real == "fake" & sharing_all == 6 ~ 6))

# Create a sharing_real DV
df <- df %>% 
  mutate(sharing_real = case_when(
    real == "real" & sharing_all == 1 ~ 1,
    real == "real" & sharing_all == 2 ~ 2,
    real == "real" & sharing_all == 3 ~ 3,
    real == "real" & sharing_all == 4 ~ 4,
    real == "real" & sharing_all == 5 ~ 5,
    real == "real" & sharing_all == 6 ~ 6))


# Rescale headline from 1-4 to 0-1
df = df %>% 
  mutate(across(c("sharing_all","sharing_fake","sharing_real"),
                ~ rescale(as.numeric(.x))))

dflong <- df
dflong <- dflong %>% 
  dplyr::select(resid, sharing_all, treatment, real, headline)

df %>% 
  group_by(real) %>% 
        summarise(mean = mean(sharing_all, na.rm = T))

# BACKGROUND VARIABLES ----------------------------------------
##### gender -----------------------------------------------------
names(df)[names(df) == "gender"] <- "female"
df$female[df$female== 2 ] <- 0
val_labels(df$female) <- c("male" = 0, "female" = 1)
var_label(df$female) <- "Gender"

# profile_age1 -----------------------------------------------------
df$age4 <- factor(df$profile_age1, # country
                  levels=c(1,2,3,4),
                  labels=c("18-34",
                           "35-54",
                           "55-69",
                           "70+"))
var_label(df$age4) <- "Age: 4 categories"

# profile_age2 -----------------------------------------------------
names(df)[names(df) == "profile_age2"] <- "age14"
val_labels(df$age14) <- c("18-22" = 1,
                         "23-27" = 2,
                         "28-32" = 3,
                         "33-37" = 4,
                         "38-42" = 5,
                         "43-47" = 6,
                         "48-52" = 7,
                         "53-57" = 8,
                         "58-62" = 9,
                         "63-67" = 10,
                         "68-72" = 11,
                         "73-77" = 12,
                         "78-82" = 13,
                         "83-89" = 14)
var_label(df$age14) <- "Age: 14 categories in five year intervals"

# profile_education -----------------------------------------------------
names(df)[names(df) == "profile_education_rc"] <- "educ3"
# ft_next -----------------------------------------------------
names(df)[names(df) == "ft_next"] <- "vote_next"
var_label(df$vote_next) <- "Who would you vote for in the next parliamentary election?"
df <- df %>% 
  mutate(vote_next2 = 
           case_when(
             vote_next == 1 ~ 0,
             vote_next == 2 ~ 0,
             vote_next == 3 ~ 1,
             vote_next == 4 ~ 0,
             vote_next == 5 ~ 1,
             vote_next == 6 ~ 1,
             vote_next == 7 ~ 1,
             vote_next == 8 ~ 1,
             vote_next == 9 ~ 0,
             vote_next == 16 ~ 0, 
             vote_next == 17 ~ 1,
             vote_next == 18 ~ 1,
             vote_next == 19 ~ 1,
             vote_next == 20 ~ 0))

val_labels(df$vote_next2) <- c("Left-wing" = 0,
                     "Right-wing" = 1)
# ft19 -----------------------------------------------------
names(df)

# Income
names(df)[names(df) == "household_income"] <- "income_hh"
df$income_hh[df$income_hh== 12 ] <- NA
df$income_hh[df$income_hh== 13 ] <- NA

# Attention -  Tidligere så du en video fra sundhedsmyndighederne, der gav 3 spørgsmål, man kan stille sig selv for at spotte forkert information. Hvad var de tre spørgsmål? 

df <- df %>% 
  mutate(advice3 = 
           case_when(
             attention1_1 == 1 & attention1_3 == 1 & attention1_4 == 1 ~ 1,
             attention1_1 == 0 | attention1_3 == 0 | attention1_4 == 0 ~ 0))

df$advice_who <- df$attention1_1
df$advice_howmany <- df$attention1_3
df$advice_wild <- df$attention1_4



# CREATE ORDER VARIABLE
df$headline_helper <- factor(df$headline,
                               levels=c(1:40),
                               labels=c("f1","f2","f3","f4","f5",
                                        "f6","f7","f8","f9","f10", 
                                        "f11","f12","f13","f14","f15",
                                        "f16","f17","f18","f19","f20",
                                        "r1","r2","r3","r4","r5",
                                        "r6","r7","r8","r9","r10",
                                        "r11","r12","r13","r14","r15",
                                        "r16","r17","r18","r19","r20"))


# Create order variable where headline = order position
df <- df %>%
  mutate(
    order = case_when(
      headline_helper == headline_order1 ~ 1,
      headline_helper == headline_order2 ~ 2,
      headline_helper == headline_order3 ~ 3,
      headline_helper == headline_order4 ~ 4,
      headline_helper == headline_order5 ~ 5,
      headline_helper == headline_order6 ~ 6,
      headline_helper == headline_order7 ~ 7,
      headline_helper == headline_order8 ~ 8,
      headline_helper == headline_order9 ~ 9,
      headline_helper == headline_order10 ~ 10,
      headline_helper == headline_order11 ~ 11,
      headline_helper == headline_order12 ~ 12,
      headline_helper == headline_order13 ~ 13,
      headline_helper == headline_order14 ~ 14,
      headline_helper == headline_order15 ~ 15,
      headline_helper == headline_order16 ~ 16,
      headline_helper == headline_order17 ~ 17,
      headline_helper == headline_order18 ~ 18,
      headline_helper == headline_order19 ~ 19,
      headline_helper == headline_order20 ~ 20,
      headline_helper == headline_order21 ~ 21,
      headline_helper == headline_order22 ~ 22,
      headline_helper == headline_order23 ~ 23,
      headline_helper == headline_order24 ~ 24,
      headline_helper == headline_order25 ~ 25,
      headline_helper == headline_order26 ~ 26,
      headline_helper == headline_order27 ~ 27,
      headline_helper == headline_order28 ~ 28,
      headline_helper == headline_order29 ~ 29,
      headline_helper == headline_order30 ~ 30,
      headline_helper == headline_order31 ~ 31,
      headline_helper == headline_order32 ~ 32,
      headline_helper == headline_order33 ~ 33,
      headline_helper == headline_order34 ~ 34,
      headline_helper == headline_order35 ~ 35,
      headline_helper == headline_order36 ~ 36,
      headline_helper == headline_order37 ~ 37,
      headline_helper == headline_order38 ~ 38,
      headline_helper == headline_order39 ~ 39,
      headline_helper == headline_order40 ~ 40
    )
  )

#### SAVE DATA WAVE 2 ----
wave2 <- df
save(wave2, file = here::here( "wave2_data.Rdata"))
# save(dataobjectname, file = here::here("foldername", "namefile"))



```

## Wave 3
```{r}
df <- clean_names(read_dta(here::here("wave3.dta"))) %>% 
  dplyr::rename(treatment = splitgroup) %>% 
  dplyr::rename(resid = caseid) 

df %>% glimpse()


df$wave <- 3
names(df)[121:160] <-gsub("\\_","",names(df)[121:160]) # Remove underscore from sharing_n

df_raw <- df


#Reshape from wide to long using sharing
df <- df_raw %>%
  pivot_longer(col = c(sharing1:sharing40),
               names_to = "headline",
               names_prefix = "sharing",
               values_to = "sharing_all",
               values_drop_na = FALSE) %>%
  dplyr::select(resid, sharing_all, headline, treatment, m_m_headline_rnd, m_m_headline_rnd_1, -fake_list_to_show_1:-real_list_to_show_20, everything() )

df$headline <- as.numeric(df$headline)

df <- df %>%  # Create dichotomous "real" variable
  mutate(real =
           case_when(
             headline <= 20 ~ 0,
             headline > 20 ~ 1))

df$treatment <- factor(df$treatment,
                       levels=c(1,2,3,4),
                       labels=c("Control",
                                "Nudge",
                                "Short",
                                "Long"))

df$real <- factor(df$real,
                  levels=c(0,1),
                  labels =c("fake",
                            "real"))
# Create a sharing_fake DV
df <- df %>% 
  mutate(sharing_fake = case_when(
    real == "fake" & sharing_all == 1 ~ 1,
    real == "fake" & sharing_all == 2 ~ 2,
    real == "fake" & sharing_all == 3 ~ 3,
    real == "fake" & sharing_all == 4 ~ 4,
    real == "fake" & sharing_all == 5 ~ 5,
    real == "fake" & sharing_all == 6 ~ 6))

# Create a sharing_real DV
df <- df %>% 
  mutate(sharing_real = case_when(
    real == "real" & sharing_all == 1 ~ 1,
    real == "real" & sharing_all == 2 ~ 2,
    real == "real" & sharing_all == 3 ~ 3,
    real == "real" & sharing_all == 4 ~ 4,
    real == "real" & sharing_all == 5 ~ 5,
    real == "real" & sharing_all == 6 ~ 6))

# Rescale headline from 1-4 to 0-1
df$sharing_all <- rescale(as.numeric(df$sharing_all))
df$sharing_fake <- rescale(as.numeric(df$sharing_fake))
df$sharing_real <- rescale(as.numeric(df$sharing_real))

df %>% 
  group_by(real) %>% 
  summarise(mean = mean(sharing_all, na.rm = T))

# Protection motivation theory
df <- df %>%
  mutate(
    threat = (bat_pmt_1 + bat_pmt_2) / 2,
    selfeff = (bat_pmt_3 + bat_pmt_4) / 2,
    respeff = (bat_pmt_5 + bat_pmt_6) / 2
  )

# Rescale to 0-1
df$threat <- rescale(df$threat)
df$selfeff <- rescale(df$selfeff)
df$respeff <- rescale(df$respeff)
df$bat_pmt_1 <- rescale(as.numeric(df$bat_pmt_1))
df$bat_pmt_2 <- rescale(as.numeric(df$bat_pmt_2))
df$bat_pmt_3 <- rescale(as.numeric(df$bat_pmt_3))
df$bat_pmt_4 <- rescale(as.numeric(df$bat_pmt_4))
df$bat_pmt_5 <- rescale(as.numeric(df$bat_pmt_5))
df$bat_pmt_6 <- rescale(as.numeric(df$bat_pmt_6))


# BACKGROUND VARIABLES ----------------------------------------
##### gender -----------------------------------------------------
names(df)[names(df) == "gender"] <- "female"
df$female[df$female== 2 ] <- 0
val_labels(df$female) <- c("male" = 0, "female" = 1)
var_label(df$female) <- "Gender"

# profile_age1 -----------------------------------------------------
df$age4 <- factor(df$profile_age1, # country
                  levels=c(1,2,3,4),
                  labels=c("18-34",
                           "35-54",
                           "55-69",
                           "70+"))
var_label(df$age4) <- "Age: 4 categories"

# profile_age2 -----------------------------------------------------
names(df)[names(df) == "profile_age2"] <- "age14"
val_labels(df$age14) <- c("18-22" = 1,
                         "23-27" = 2,
                         "28-32" = 3,
                         "33-37" = 4,
                         "38-42" = 5,
                         "43-47" = 6,
                         "48-52" = 7,
                         "53-57" = 8,
                         "58-62" = 9,
                         "63-67" = 10,
                         "68-72" = 11,
                         "73-77" = 12,
                         "78-82" = 13,
                         "83-89" = 14)
var_label(df$age14) <- "Age: 14 categories in five year intervals"

# profile_education -----------------------------------------------------
names(df)[names(df) == "profile_education_rc"] <- "educ3"
# ft_next -----------------------------------------------------
names(df)[names(df) == "ft_next"] <- "vote_next"
var_label(df$vote_next) <- "Who would you vote for in the next parliamentary election?"
df <- df %>% 
  mutate(vote_next2 = 
           case_when(
             vote_next == 1 ~ 0,
             vote_next == 2 ~ 0,
             vote_next == 3 ~ 1,
             vote_next == 4 ~ 0,
             vote_next == 5 ~ 1,
             vote_next == 6 ~ 1,
             vote_next == 7 ~ 1,
             vote_next == 8 ~ 1,
             vote_next == 9 ~ 0,
             vote_next == 16 ~ 0, 
             vote_next == 17 ~ 1,
             vote_next == 18 ~ 1,
             vote_next == 19 ~ 1,
             vote_next == 20 ~ 0,
             vote_next == 21 ~ 1,
             vote_next == 22 ~ 0))

val_labels(df$vote_next2) <- c("Left-wing" = 0,
                     "Right-wing" = 1)
# ft19 -----------------------------------------------------
names(df)

# Income
names(df)[names(df) == "household_income"] <- "income_hh"
df$income_hh[df$income_hh== 12 ] <- NA
df$income_hh[df$income_hh== 13 ] <- NA




 wave3 <- df
#### SAVE DATA WAVE 2----wave3 <- df
save(wave3, file = here::here( "wave3_data.Rdata")) # save(dataobjectname, file = here::here("foldername", "namefile"))

```


### Wave 3 power
```{r}
pwr.t.test(n=500, power=0.8, sig.level=.05, alternative="greater") # Power analysis
pwr.t.test(n=500, power=0.9, sig.level=.05, alternative="greater") # Power analysis

```

## Merge wave 1+2
```{r}
load(here::here("wave1_data.Rdata")) # Load wave 1
load(here::here("wave2_data.Rdata")) # Load wave 2

df <- merge(wave2, wave1, by = "resid") 


df <- df[!is.na(df$resid2), ] %>%
  select(
    -starts_with("fake_list"),
    -starts_with("real_list"),
    -starts_with("headline_order"),
    -starts_with("headline_show"),
    -starts_with("page"))
    

save(df, file = here::here( "final_data.Rdata"))

```
### Attrition
```{r}
load(here::here("wave1_data.Rdata")) # Load wave 1
load(here::here("wave2_data.Rdata")) # Load wave 2

d = merge(wave2, wave1, by = "resid")

df = d %>%
  pivot_wider(
    id_cols = c(
      resid,
      treatment,
      resid2,
      tot_time.x
    ),
     names_from = headline,
     values_from = sharing_all
  ) 

# ATTRITION
# Count number of distinct respondents in the waves
wave1 %>% distinct(resid) %>% count()
wave2 %>% distinct(resid) %>% count()


# Count any post-treatment attrition
d %>%
  group_by(resid) %>%
  summarise(
    na_count = sum(is.na(sharing_all))
  ) %>% ungroup() %>% pull(na_count) %>%  table()
  

df %>% select(-tot_time.x, ) %>% 
    drop_na() %>% count()
  

    
    
```


# Main text


## Study 1. {.tabset}

### Figure 1: Sharing fake/real <- Treatment 

```{r}
#install.packages("pacman")
pacman::p_load(tidyverse,
               here,
               glue,
               broom,
               purrr,
               modelsummary,
               ggplot2,
               rstatix,
               kableExtra)

load(here::here( "final_data.Rdata")) # Load wave 

# Define vars
dvs <- c("sharing_fake", "sharing_real")
ivs <-  c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs)  %>%
  mutate(f = glue("{dvs} ~ {ivs}"))

# Apply lm function on formulas, create tidy dataframe 3) filter relevant coefficients
m0 <- fs %>%
  mutate(model = map(f, ~ lm(formula = as.formula(.x),
                             data = df))) 

# Create temporary frame with cluster robust std errors
m_temp = m0 %>%
  pull(model) %>% 
  modelsummary(output = "modelsummary_list",
               vcov =  ~ resid + headline,
               statistic = "conf.int")

# Create empty object
m_long = list(NULL)


# tidy each regression and bind them together
for (i in 1:2){
  m_temp[[i]] = m_temp[[i]] %>% tidy() %>%
    mutate(model = factor(ifelse(i == 1, "False headlines",
                          ifelse(i == 2, "Real headlines",
                                 NA))))
  m_long = rbind(m_long, m_temp[[i]])
}

########################################## 
# Sharing discernment
# calculate mean sharing discernment and collapse by respondent
df_disc <- df %>%
  group_by(resid, treatment) %>%
  summarise(sharing_disc = mean(sharing_real, na.rm = T)-mean(sharing_fake, na.rm = T)) %>% ungroup() 
  

# Define vars
dvs <- c("sharing_disc")
ivs <-  c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs)  %>%
  mutate(f = glue("{dvs} ~ {ivs}"))

# Apply lm function on formulas, create tidy dataframe 3) filter relevant coefficients
m_wide <- fs %>%
  mutate(model = map(f, ~ lm(formula = as.formula(.x),
                             data = df_disc))) %>%
  pull(model) %>% 
  modelsummary(output = "modelsummary_list",
               vcov =  ~ resid,
               statistic = "conf.int") %>% 
  tidy() %>% 
  mutate(model = factor("Sharing Discernment"))
  

# combine models
m <- rbind(m_long, m_wide) %>% 
  mutate_if(is.numeric, ~round(., 3)) %>% 
  filter(term == "treatmentNudge" |
           term == "treatmentShort" |
           term == "treatmentLong") %>% 
  mutate(
  term = factor(
    term,
    levels = c("treatmentLong",
               "treatmentShort",
               "treatmentNudge"),
    labels = c("3 min video",
               "15 sec video",
               "Accuracy nudge")
  )) %>% 
   mutate_if(is.numeric, ~round(., 3))

# Call in-text coefficients, confidence intervals and p-value 
m %>% 
  select(term, model, estimate, conf.low, conf.high, p.value )

# Create Figure 1 in the main text
p = m %>%
  ggplot(aes(x = estimate,
             y = term)) +
  geom_vline(xintercept = 0) +
  geom_point() +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0) +
  scale_x_continuous(
    limits = c(-.11, .11),
    labels = dropLeadingZero)+
  facet_wrap( ~ model, ncol = 3L) +
  xlab("") +
  ylab("")

# Call figure
p

ggsave(
  here::here( "0_study1_main.png"),
  height = 2,
  width = 6)

pdf(here::here( "0_study1_main.pdf"),
    height = 2,
    width = 6)
print(p)
dev.off()


```

### Count participants

```{r eval=FALSE, include=FALSE}
# Model 1: sharing fake

# See number of obseravtions

m0[["model"]][[1]] 
m0[["model"]][[1]][["nclusters"]]

# Model 2: sharing fake
m_temp[[1]]$glance$nobs
m0[["model"]][[2]][["nclusters"]]
```

### Effect size
```{r}
# Calculate cohen's d
df %>%
  mutate(treatment = fct_rev(treatment)) %>% 
  rstatix::cohens_d(sharing_fake ~ treatment, var.equal = TRUE)  %>% 
  mutate_if(is.numeric, ~round(., 3)) %>% 
  filter(group2 == "Control") %>% 
  pull(effsize,group1) 

df %>%
  mutate(treatment = fct_rev(treatment)) %>% 
  rstatix::cohens_d(sharing_real ~ treatment, var.equal = TRUE)  %>% 
  mutate_if(is.numeric, ~round(., 3)) %>% 
  filter(group2 == "Control") %>% 
    pull(effsize,group1) 

# Note here I call the other data frame from sharing discernment
df_disc %>% 
  ungroup() %>% 
  mutate(treatment = fct_rev(treatment)) %>% 
  cohens_d(sharing_disc ~ treatment, var.equal = TRUE) %>%  
  mutate_if(is.numeric, ~round(., 3)) %>% 
  filter(group2 == "Control") %>% 
  pull(effsize,group1)

# Alternative way
df1 = df %>%
  mutate(Treatment = factor(case_when(
    treatment == "Long" ~ "Long",
    treatment == "Control" ~ "Control"))) %>% 
  select(Treatment, sharing_fake) 

# T-test
t.test(sharing_fake ~ Treatment, data = df1)


```


## Study 2 {.tabset}

### Figure 2: PMT <- Treatment 

**Conclusion**: The long intervention increases self-efficacy

```{r}
load(here::here("wave3_data.Rdata")) # Load wave 3

# Widen data: Each participant only answers once
df = wave3 %>%
  pivot_wider(
    id_cols = c(
      resid,
      threat,
      selfeff,
      respeff,
      treatment,
      bat_pmt_1,
      bat_pmt_2,
      bat_pmt_3,
      bat_pmt_4,
      bat_pmt_5,
      bat_pmt_6
    ),
     names_from = headline,
     values_from = sharing_all
  ) 


# Define vars
dvs <- c("threat", # Define you dependent variables
             "selfeff",
            "respeff")
ivs <-  c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs)  %>%
  mutate(f = glue("{dvs} ~ {ivs}"))

# Apply lm function on formulas, create tidy dataframe 3) filter relevant coefficients
m0 <- fs %>%
  mutate(model = map(f, ~ lm(
    formula = as.formula(.x),
    data = df
  ))) 

# Get output
m = modelsummary(dvnames(m0$model),
               output = "modelsummary_list",
               statistic = "conf.int",
               vcov =  ~ resid) %>% 
  map(tidy) %>%
  map_dfr(bind_rows, .id = "outcome") %>% 
  mutate_if(is.numeric, round, 3) %>% 
  filter(term == "treatmentNudge" |
           term == "treatmentShort" |
           term == "treatmentLong") %>%
  mutate(
    term = factor(
      term,
      levels = c("treatmentLong",
                 "treatmentShort",
                 "treatmentNudge"),
      labels = c("3 min video",
                 "15 sec video",
                 "Accuracy nudge")
    ),
    outcome = factor(outcome,
                     levels = c(
                       "threat",
                       "selfeff",
                       "respeff"),
      labels = c("Threat appraisal",
                 "Self efficacy",
                 "Response efficacy")
    )  )

# Call in-text coefficients, confidence intervals and p-value 
m %>% 
  select(term, outcome, estimate, conf.low, conf.high, p.value )


# Create plot 
p <- ggplot(data = m, aes(x = estimate,
                         y = term,
                         group = outcome))+
  geom_point() +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0) +
    scale_x_continuous(
    limits = c(-.1, .1),
    labels = dropLeadingZero)+
  facet_wrap(~ outcome, ncol = 4L) +
  geom_vline(xintercept = 0) +
  xlab("") +
  ylab("")

pdf(here::here( "0_study2_main.pdf"), height = 2, width = 6)
 print(p)
 dev.off()
 
p

 ggsave(
 here::here( "0_study2_main.png"), height = 2, width = 6)

```

#### Count participants

```{r eval=FALSE, include=FALSE}
# Note: There is only one observation per respondent
# Model 1
m0[["model"]][[1]][["nobs"]]
m0[["model"]][[1]][["nclusters"]]

# Model 2
m0[["model"]][[2]][["nobs"]]
m0[["model"]][[2]][["nclusters"]]

# Model 3
m0[["model"]][[3]][["nobs"]]
m0[["model"]][[3]][["nclusters"]]
```

#### Effect size 
```{r}
# Calculate cohen's d

# Threat appraisal
df %>% 
  mutate(treatment = fct_rev(treatment)) %>%
  cohens_d(threat ~ treatment, var.equal = TRUE) %>%
  mutate_if(is.numeric, ~ round(., 3)) %>%
  filter(group2 == "Control") %>% 
  pull(effsize, group1)

# Self-efficacy
df %>% 
  mutate(treatment = fct_rev(treatment)) %>%
  cohens_d(selfeff ~ treatment, var.equal = TRUE) %>%
  mutate_if(is.numeric, ~ round(., 3)) %>%
  filter(group2 == "Control") %>% 
  pull(effsize, group1)

# Response efficacy
df %>% 
  mutate(treatment = fct_rev(treatment)) %>%
  cohens_d(respeff ~ treatment, var.equal = TRUE) %>%
  mutate_if(is.numeric, ~ round(., 3)) %>%
  filter(group2 == "Control") %>% 
  pull(effsize, group1)

```

#### Coefficients: PMT <- Treatment
This is the coefficients for the treatment effect on PMT measures
```{r}
# Get goefficients
m %>% mutate(estimate = round(m$estimate, 2),
                 conf.low = round(m$conf.low, 2),
                 conf.high = round(m$conf.high, 2),
                 p.value = round(m$p.value, 2),
                 ) %>% 
  select(term, estimate, conf.low, conf.high, p.value, outcome)

```

# Appendix 

## Load packages

```{r}
# Install packages, if needed
#remove.packages("rlang")  # Removes rlang
#install.packages("https://cran.r-project.org/src/contrib/Archive/rlang/rlang_1.1.2.tar.gz", repos = NULL, type="source")

#remove.packages("kableExtra")  # Removes rlang
#install.packages(KableExtra)

# rlang needs to be >= 1.1.0
packageVersion("rlang")
packageVersion("kableExtra")
packageVersion("sjPlot")
packageVersion("estimatr")

################################################################
##### Restart R session, to make sure this runs smoothly #######
################################################################

# Use pacman to install all other packages
install.packages("pacman")
library("pacman")
pacman::p_load(modelsummary,
               tidyverse,
               kableExtra,
               glue,
               estimatr,
               rstatix,
               marginaleffects,
               scales,
               fixest,
               sjPlot,
               emmeans)


```


## Section C: Pretest of headlines
```{r}
load(here::here("wave_pretest.Rdata")) # Load wave 3
df <- wave_pretest


# SUMMARIZE
# Use group_by to designate the groups

# find mean for all headline
df <- df %>%
  group_by(headline) %>%
  mutate(mean_rating = mean(rating),
         sd_rating = sd(rating),
         headline3 = factor(headline3, 
                     levels = c("fake", 
                                "real", 
                                "non"),
                     labels = c("False",
                                "Real",
                                "Non-covid")))


# Save plot in object p
p <- ggplot(data=df, aes(x=headline_text, y=mean_rating, color = headline3)) +
  geom_point() +
  coord_flip()+
  guides(color=guide_legend(title="Headline veracity"))+
  labs(y = "Perceived accuracy",
       x = "")+ theme(legend.position="bottom")

# This chuck saves the plot p as pretest.pdf
pdf(here::here( "pretest.pdf"), width = 12) # Save as pdf
print(p) # This code will not plot the figure, but save it as pdf
dev.off() # End save

# Print plot p in Rstudio 
p
```



## Section D: Descriptive statistics


### Study 1: Descriptives dep var across conditions
```{r}
# CALCULATE MEANS AND SD ACROSS VARIABLES IN EXPERIMENTAL CONDITIONS
# load study 1 data
load(here::here( "final_data.Rdata")) # Load wave 

d = df %>%
  group_by(resid, treatment) %>%
  rename(Treatment = treatment) %>%
  summarise(
    `False Sharing` = mean(sharing_fake, na.rm = T),
    `Real Sharing` = mean(sharing_real, na.rm = T),
    `Sharing Discernment` = mean(sharing_real, na.rm = T) - mean(sharing_fake, na.rm = T)
  ) %>%
  ungroup()

# Output table
modelsummary::datasummary(
  Treatment * (`False Sharing` + `Real Sharing` + `Sharing Discernment`) ~
    (Mean + SD + N)  ,
  data = d,
  title = "Summary statistics for dependent variables in Study 1",
  notes = "Sample size: 2232",
#  output = "latex"
  output = here::here("s1_dv_descriptives.tex") # SAVE the table as a tex file in the root folder
)


```

### Study 2: Descriptives dep var across conditions
```{r}
# load study 2 data
load(here::here( "wave3_data.Rdata")) # Load wave 

# Save table as s2_dv_descriptives.tex
d = wave3 %>%
  group_by(resid, treatment) %>%
  rename(Treatment = treatment) %>% 
  summarise(`Threat Appraisal` = mean(threat, na.rm = T),
            `Self Efficacy` = mean(selfeff, na.rm = T),
            `Response Efficacy` = mean(respeff, na.rm = T))

# Output table
modelsummary::datasummary(Treatment * (`Threat Appraisal` + `Self Efficacy` + `Response Efficacy`) ~ (Mean + SD + N),
  data = d,
  title = "Summary statistics for dependent variables in Study 2",
  notes = "Sample size: 2012",
# output = "latex"
 output = here::here(  "s2_dv_descriptives.tex") # Save file
)

```

## Section F: Regression output and additional analyses

### 6.1 Regression output from manuscript

#### Figure 1: Regression output

```{r}

#### Produce sharing real, sharing fake and intervention X veracity ####

# Load data
load(here::here( "final_data.Rdata")) # Load wave 

# Define vars
dvs <- c("sharing_fake", "sharing_real")
ivs <-  c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs)  %>%
  mutate(f = glue("{dvs} ~ {ivs}")) %>% 
  # Add row with interaction term
  add_row(f = glue("sharing_all ~ {ivs}*real"))

# Apply lm function on formulas, create tidy dataframe 3) filter relevant coefficients
m0 <- fs %>%
  mutate(model = map(f, ~ lm(formula = as.formula(.x),
                             data = df)))

# Edit model names
names(m0$model) <- c("False headline sharing", "Real headline sharing", "All headlines (Intervention X veracity)")

# This edits the labels (not necessary)
cm <- c(
  "treatmentNudge" = "Accuracy nudge (vs. control)",
  "treatmentShort" = "15 sec video (vs. control)",
  "treatmentLong" = "3 min video (vs. control)",
  "realreal" = "Headline veracity (0 = false, 1 = true)",
  "treatmentNudge:realreal" = "Accuracy nudge X veracity",
  "treatmentShort:realreal" = "15 sec video X veracity",
  "treatmentLong:realreal" = "3 min video X veracity",
  '(Intercept)' = 'Constant'
)

# Define what should be shown
gm <- tibble::tribble(
  ~raw,        ~clean,          ~fmt,
  "nobs",      "N",             0,
  "r.squared", "R2", 2,
  "adj.r.squared", "R2 Adj.", 2)

# Create table
m0$model %>%
  modelsummary(vcov =  ~ resid + headline,
               title = "Regression output of figure 1 in main text: Willingness to share headlines. \\label{tab:fig1_regout}",
  coef_map = cm,
  gof_map = gm,
  estimate  = "{estimate} ({std.error}){stars}",
  statistic = NULL,
  stars = c('*' = .05, '**' = .01, '***' = .001),
  output = "latex",
  escape = F
  )  %>% 
  kable_styling(latex_options = c("hold_position",
                                  "scale_down",
                                  notation="none"
                                  ),
                font_size = 7,
                 full_width = FALSE) %>% 
  footnote(glue("'False headline sharing' shows the regression output of the left panel in figure 1 in the main text. 'Real headline sharing' shows the regression output of the middle panel. 'All headlines (Treatment X veracity)' shows a regression where all headlines are included with the interaction of the treatment interventions and the veracity of the headline on sharing intention. Coefficients are OLS estimates. Standard errors (in parentheses) are clustered at the respondent and headline level. All regressions are based on observations from 2232 respondents. * p < 0.05, ** p < 0.01, *** p < 0.001"),
           threeparttable = TRUE) %>%
  save_kable(file = here::here(  "fig1_regoutput.tex"))

#### Sharing discernment ####

# Load data
load(here::here( "final_data.Rdata")) # Load wave 

# calculate mean sharing discernment and collapse by respondent
df_disc <- df %>%
  group_by(resid, treatment) %>%
  summarise(sharing_disc = mean(sharing_real, na.rm = T)-mean(sharing_fake, na.rm = T))


# Define vars
dvs <- c("sharing_disc")
ivs <-  c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs)  %>%
  mutate(f = glue("{dvs} ~ {ivs}"))


# Apply lm function on formulas, create tidy dataframe 3) filter relevant coefficients
disc_raw <- fs %>%
  mutate(model = map(f, ~ lm(formula = as.formula(.x),
                             data = df_disc)))

# Edit model names
names(disc_raw$model) <- "Sharing discernment"

# This edits the labels (not necessary)
cm <- c(
  "treatmentNudge" = "Accuracy nudge (vs. control)",
  "treatmentShort" = "15 sec video (vs. control)",
  "treatmentLong" = "3 min video (vs. control)",
  '(Intercept)' = 'Constant'
)

# Define what should be shown
gm <- tibble::tribble(
  ~raw,        ~clean,          ~fmt,
  "nobs",      "N",             0,
  "r.squared", "R2", 2,
  "adj.r.squared", "R2 Adj.", 2)

# Create table
disc_raw$model %>%
  modelsummary(vcov =  ~ resid,
               title = "Regression output of figure 1 in main text: Willingness to share headlines. \\label{tab:fig1_regout_disc}",
  coef_map = cm,
  gof_map = gm,
  estimate  = "{estimate} ({std.error}){stars}",
  statistic = NULL,
  stars = c('*' = .05, '**' = .01, '***' = .001),
               output = "latex",
  escape = F)  %>% 
  kable_styling(latex_options = c("hold_position",
                                  #"scale_down",
                                  notation="none"),
                font_size = 7,
                 full_width = FALSE) %>% 
  footnote(glue("'Sharing discernment' shows the regression output of the right panel in figure 1 in the main text. The discernment score is calculated as the respondent level difference between mean real and false headline sharing, that is, sharing real - sharing false. Coefficients are OLS estimates. Standard errors (in parentheses) are clustered at the respondent level. * p < 0.05, ** p < 0.01, *** p < 0.001"),
           threeparttable = TRUE) %>%
  save_kable(file = here::here(  "fig1_regoutput_disc.tex")) # This line saves the table
   
```


#### Figure 2: Regression output
```{r}

load(here::here("wave3_data.Rdata")) # Load wave 3

# Widen data: Each participant only answers once
df = wave3 %>%
  pivot_wider(
    id_cols = c(
      resid,
      threat,
      selfeff,
      respeff,
      treatment,
      bat_pmt_1,
      bat_pmt_2,
      bat_pmt_3,
      bat_pmt_4,
      bat_pmt_5,
      bat_pmt_6
    ),
     names_from = headline,
     values_from = sharing_all
  ) 


# Define vars
dvs <- c("threat", # Define you dependent variables
             "selfeff",
            "respeff")
ivs <-  c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs)  %>%
  mutate(f = glue("{dvs} ~ {ivs}"))

# Apply lm function on formulas, create tidy dataframe 3) filter relevant coefficients
m0 <- fs %>%
  mutate(model = map(f, ~ lm(
    formula = as.formula(.x),
    data = df
  ))) 

models <- list(
  "Response efficacy" = m0$model[[1]],
  "Self efficacy" = m0$model[[2]],
  "Threat appraisal" = m0$model[[3]]
)


# Edit labels
cm <- c(
  "treatmentNudge" = "Accuracy nudge (vs. control)",
  "treatmentShort" = "15 sec video (vs. control)",
  "treatmentLong" = "3 min video (vs. control)",
  '(Intercept)' = 'Constant'
)

# Define what should be shown
gm <- tibble::tribble(
  ~raw,        ~clean,          ~fmt,
  "nobs",      "N",             0,
  "r.squared", "R2", 2,
  "adj.r.squared", "R2 Adj.", 2)

# Create table
modelsummary(
  vcov = ~ resid,
  models,
  title = "Regression output of figure 2 in main text: Effect of interventions on protection motivation theory. \\label{tab:fig2_regout}",
  coef_map = cm,
  gof_map = gm,
  estimate  = "{estimate} ({std.error}){stars}",
  statistic = NULL,
  stars = c('*' = .05, '**' = .01, '***' = .001),
               output = "latex",
  escape = F)  %>% 
   kable_styling(latex_options = c("hold_position", 
                                   notation="none"),
                font_size = 8,
                 full_width = FALSE) %>% 
  footnote(glue("The three columns shows the regression output of figure 2 in the main text for 1) Threat appraisal, 2) Self efficacy, and 3) Response efficacy respectively. Coefficients are OLS estimates. Standard errors (in parentheses) are clustered at the respondent level. * p < 0.05, ** p < 0.01, *** p < 0.001"),
           threeparttable = TRUE) %>% 
   save_kable(file = here::here(  "fig2_regoutput.tex"))

```


### 6.2 Pre-registered analyses

#### H4-H5 - Sharing fake
```{r}

load(here::here( "final_data.Rdata")) # Load wave 

# Define stuff
p = ggplot(NULL) # Create empty list to store results
raw = list(NULL)
m = list(NULL)

# Define vars
dvs <- c("sharing_fake")
ins <-  c("index_crt", "index_socialcom", "index_trust", "index_nfc", "trust_health", "trust_govern")  
ivs <- c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs, ins)  %>%
  mutate(
    f = glue("{dvs} ~ {ivs} * {ins}")
  )

# Run apply function on dataframes
raw <- fs %>%
  mutate(model = map(f, ~ lm_robust(
    formula = as.formula(.x),
    data = df,
    clusters = resid
  ))) 

# Coef labels
cm <- c("treatmentNudge"    = "Accuracy nudge (vs. control)",
        "treatmentShort"    = "15 sec video (vs. control)",
        "treatmentLong"    = "3 min video (vs. control)",
        
        "index_crt"    = "Cognitive reflection",
        "treatmentNudge:index_crt" = "Accuracy nudge X cognitive reflection",
        "treatmentShort:index_crt" = "15 sec video X cognitive reflection",
        "treatmentLong:index_crt" = "3 min video X cognitive reflection",
        
        "index_nfc" = "Need for cognition",
        "treatmentNudge:index_nfc" = "Accuracy nudge X need for cognition",
        "treatmentShort:index_nfc" = "15 sec video X need for cognition",
        "treatmentLong:index_nfc" = "3 min video X need for cognition",

        "index_socialcom"    = "Attention to social comparison",
        "treatmentNudge:index_socialcom" = "Accuracy nudge X attention to social comparison",
        "treatmentShort:index_socialcom" = "15 sec video X attention to social comparison",
        "treatmentLong:index_socialcom" = "3 min video X attention to social comparison",
        
        "index_trust" = "Trust",
        "treatmentNudge:index_trust" = "Accuracy nudge X trust",
        "treatmentShort:index_trust" = "15 sec video X trust",
        "treatmentLong:index_trust" = "3 min video X trust",
        
        "trust_govern" = "Trust government",
        "treatmentNudge:trust_govern" = "Accuracy nudge X trust government",
        "treatmentShort:trust_govern" = "15 sec video X trust government",
        "treatmentLong:trust_govern" = "3 min video X trust government",
        
        "trust_health" = "Trust in Health Auth.",
        "treatmentNudge:trust_health" = "Accuracy nudge X trust in Health Auth.",
        "treatmentShort:trust_health" = "15 sec video X trust in Health Auth.",
        "treatmentLong:trust_health" = "3 min video X trust in Health Auth.",
        
        "(Intercept)" = "Constant")

# Define what should be shown
gm <- tibble::tribble(
  ~raw,        ~clean,          ~fmt,
  "nobs",      "N",             0,
  "r.squared", "R2", 2,
  "adj.r.squared", "R2 Adj.", 2,
  "vcov.type", "SE clusters", 0)


##  Produce table
modelsummary(
  raw[["model"]],
  title = "Effect of interventions on sharing fake news headlines interacted with covariates. \\label{tab:intervention_covariate_interaction}",
  estimate = "{estimate}{stars} ({std.error})",
  statistic = NULL,
  #vcov = ~ resid # Do you want clustered standard errors? For twoway cluster type e.g. "vcov = ~ resid + headline"
  stars = c("*" = .05, "**" = .01, "***" = .001),
  coef_map = cm, # If you didn't define cm, then put a "#" before this line
  gof_map = gm, # If you didn't define gm, then put a "#" before this line
  output = "latex") %>% 
  kable_styling(position = "center", latex_options = c("scale_down")) %>%
  kableExtra::footnote(glue("Each model shows the interaction effects between treatments and one of the covariates: Cognitive reflection, need for cognition, attention to social comparison, trust, trust in government, and trust in health authorities. Coefficients are OLS estimates. All regressions are based on observations from {raw$model[[1]]$nclus} respondents. Standard errors (in parentheses) are clustered at the respondent level. * p < 0.05, ** p < 0.01, *** p < 0.001"),
           threeparttable = TRUE) %>% 
  readr::write_lines(file = here::here(  "intervention_covariate_interaction.tex"))


```

#### H4-H5 - Sharing real

```{r}

load(here::here( "final_data.Rdata")) # Load wave 

# Define vars
dvs <- c("sharing_real")
ins <-  c("index_crt", "index_socialcom", "index_trust", "index_nfc", "trust_health", "trust_govern")  
ivs <- c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs, ins)  %>%
  mutate(
    f = glue("{dvs} ~ {ivs} * {ins}")
  )

# Run apply function on dataframes
m0 <- fs %>%
  mutate(model = map(f, ~ lm_robust(
    formula = as.formula(.x),
    data = df,
    clusters = resid
  )))


# Coef labels
cm <- c("treatmentNudge"    = "Accuracy nudge (vs. control)",
        "treatmentShort"    = "15 sec video (vs. control)",
        "treatmentLong"    = "3 min video (vs. control)",
        
        "index_crt"    = "Cognitive reflection",
        "treatmentNudge:index_crt" = "Accuracy nudge X cognitive reflection",
        "treatmentShort:index_crt" = "15 sec video X cognitive reflection",
        "treatmentLong:index_crt" = "3 min video X cognitive reflection",
        
        "index_nfc" = "Need for cognition",
        "treatmentNudge:index_nfc" = "Accuracy nudge X need for cognition",
        "treatmentShort:index_nfc" = "15 sec video X need for cognition",
        "treatmentLong:index_nfc" = "3 min video X need for cognition",

        "index_socialcom"    = "Attention to social comparison",
        "treatmentNudge:index_socialcom" = "Accuracy nudge X attention to social comparison",
        "treatmentShort:index_socialcom" = "15 sec video X attention to social comparison",
        "treatmentLong:index_socialcom" = "3 min video X attention to social comparison",
        
        "index_trust" = "Trust",
        "treatmentNudge:index_trust" = "Accuracy nudge X trust",
        "treatmentShort:index_trust" = "15 sec video X trust",
        "treatmentLong:index_trust" = "3 min video X trust",
        
        "trust_govern" = "Trust government",
        "treatmentNudge:trust_govern" = "Accuracy nudge X trust government",
        "treatmentShort:trust_govern" = "15 sec video X trust government",
        "treatmentLong:trust_govern" = "3 min video X trust government",
        
        "trust_health" = "Trust in Health Auth.",
        "treatmentNudge:trust_health" = "Accuracy nudge X trust in Health Auth.",
        "treatmentShort:trust_health" = "15 sec video X trust in Health Auth.",
        "treatmentLong:trust_health" = "3 min video X trust in Health Auth.",
        
        "(Intercept)" = "Constant")

# Define what should be shown
gm <- tibble::tribble(
  ~raw,        ~clean,          ~fmt,
  "nobs",      "N",             0,
  "r.squared", "R2", 2,
  "adj.r.squared", "R2 Adj.", 2,
  "vcov.type", "SE clusters", 0)


##  Produce table
modelsummary(
  m0$model,
  title = "Effect of interventions on sharing real news headlines interacted with covariates. \\label{tab:intervention_covariate_interaction_real}",
  estimate = "{estimate}{stars} ({std.error})",
  statistic = NULL,
  #vcov = ~ resid # Do you want clustered standard errors? For twoway cluster type e.g. "vcov = ~ resid + headline"
  stars = c("*" = .05, "**" = .01, "***" = .001),
  coef_map = cm, # If you didn't define cm, then put a "#" before this line
  gof_map = gm, # If you didn't define gm, then put a "#" before this line
  output = "latex") %>% 
  kable_styling(position = "center", latex_options = c("scale_down")) %>%
  footnote(glue("Each model shows the interaction effects between treatments and one of the covariates: Cognitive reflection, need for cognition, attention to social comparison, trust, trust in government, and trust in health authorities. Coefficients are OLS estimates. All regressions are based on observations from {m0$model[[1]]$nclus} respondents. Standard errors (in parentheses) are clustered at the respondent level. * p < 0.05, ** p < 0.01, *** p < 0.001"),
           threeparttable = TRUE) %>% 
  readr::write_lines(file = here::here(  "intervention_covariate_interaction_real.tex"))


```

#### H4-H5 - Sharing discernment
```{r}

load(here::here( "final_data.Rdata")) # Load wave 

# calculate sharing discernment
df <- df %>% 
  group_by(resid) %>% 
  mutate(
    mean_all = mean(sharing_all, na.rm = T),
    mean_fake = mean(sharing_fake, na.rm = T),
    mean_real = mean(sharing_real, na.rm = T)
  ) %>% 
  mutate(sharing_disc = mean_real-mean_fake, na.rm = T)

# pivot
df <- df %>%
  pivot_wider(
    id_cols = c(resid, starts_with("mean_"), starts_with("index_"), sharing_disc, starts_with("trust_"), treatment),
    names_from = headline,
    values_from = c(sharing_all,sharing_fake, sharing_real)
  )

# Define vars
dvs <- c("sharing_disc")
ins <-  c("index_crt", "index_socialcom", "index_trust", "index_nfc", "trust_health", "trust_govern")  
ivs <- c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs, ins)  %>%
  mutate(
    f = glue("{dvs} ~ {ivs} * {ins}")
  )

# Run apply function on dataframes
m0 <- fs %>%
  mutate(model = map(f, ~ lm_robust(
    formula = as.formula(.x),
    data = df,
    clusters = resid
  )))


# Coef labels
cm <- c("treatmentNudge"    = "Accuracy nudge (vs. control)",
        "treatmentShort"    = "15 sec video (vs. control)",
        "treatmentLong"    = "3 min video (vs. control)",
        
        "index_crt"    = "Cognitive reflection",
        "treatmentNudge:index_crt" = "Accuracy nudge X cognitive reflection",
        "treatmentShort:index_crt" = "15 sec video X cognitive reflection",
        "treatmentLong:index_crt" = "3 min video X cognitive reflection",
        
        "index_nfc" = "Need for cognition",
        "treatmentNudge:index_nfc" = "Accuracy nudge X need for cognition",
        "treatmentShort:index_nfc" = "15 sec video X need for cognition",
        "treatmentLong:index_nfc" = "3 min video X need for cognition",

        "index_socialcom"    = "Attention to social comparison",
        "treatmentNudge:index_socialcom" = "Accuracy nudge X attention to social comparison",
        "treatmentShort:index_socialcom" = "15 sec video X attention to social comparison",
        "treatmentLong:index_socialcom" = "3 min video X attention to social comparison",
        
        "index_trust" = "Trust",
        "treatmentNudge:index_trust" = "Accuracy nudge X trust",
        "treatmentShort:index_trust" = "15 sec video X trust",
        "treatmentLong:index_trust" = "3 min video X trust",
        
        "trust_govern" = "Trust government",
        "treatmentNudge:trust_govern" = "Accuracy nudge X trust government",
        "treatmentShort:trust_govern" = "15 sec video X trust government",
        "treatmentLong:trust_govern" = "3 min video X trust government",
        
        "trust_health" = "Trust in Health Auth.",
        "treatmentNudge:trust_health" = "Accuracy nudge X trust in Health Auth.",
        "treatmentShort:trust_health" = "15 sec video X trust in Health Auth.",
        "treatmentLong:trust_health" = "3 min video X trust in Health Auth.",
        
        "(Intercept)" = "Constant")

# Define what should be shown
gm <- tibble::tribble(
  ~raw,        ~clean,          ~fmt,
  "nobs",      "N",             0,
  "r.squared", "R2", 2,
  "adj.r.squared", "R2 Adj.", 2,
  "vcov.type", "SE clusters", 0)


##  Produce table
modelsummary(
  m0$model,
  title = "Effect of interventions on sharing discernment interacted with covariates. \\label{tab:intervention_covariate_interaction_disc}",
  estimate = "{estimate}{stars} ({std.error})",
  statistic = NULL,
  #vcov = ~ resid # Do you want clustered standard errors? For twoway cluster type e.g. "vcov = ~ resid + headline"
  stars = c("*" = .05, "**" = .01, "***" = .001),
  coef_map = cm, # If you didn't define cm, then put a "#" before this line
  gof_map = gm, # If you didn't define gm, then put a "#" before this line
  output = "latex") %>% 
  kable_styling(position = "center", latex_options = c("scale_down")) %>%
  footnote(glue("Each model shows the interaction effects between treatments and one of the covariates: Cognitive reflection, need for cognition, attention to social comparison, trust, trust in government, and trust in health authorities. Coefficients are OLS estimates. * p < 0.05, ** p < 0.01, *** p < 0.001"),
           threeparttable = TRUE) %>% 
  readr::write_lines(file = here::here(  "intervention_covariate_interaction_disc.tex"))


```

#### H6 - Decay
```{r}
load(here::here( "final_data.Rdata")) # Load wave 

# Define vars
dvs <- c("sharing_fake", "sharing_real")
ivs <-  c("treatment")
ins <-  c("order")  


# Specificy formulas
fs <- crossing(dvs, ivs, ins)  %>%
  mutate(
    f = glue("{dvs} ~ {ivs} * {ins}")
  )

# Apply lm function on formulas, create tidy dataframe 3) filter relevant coefficients
m0 <- fs %>%
  mutate(model = map(f, ~ lm_robust(
    formula = as.formula(.x),
    data = df,
    clusters = resid
  ))) 

# Glimpse
m0[[5]] %>% map(tidy) %>% map_if(is.data.frame, ~mutate_if(.x, is.numeric, round, 3))

nclus <- m0[["model"]][[1]][["nclusters"]]

# Regression table
models <- list(
  "False headline sharing" = m0$model[[1]],
  "Real headline sharing" = m0$model[[2]])

cm <- c("treatmentNudge"    = "Accuracy nudge (vs. control)",
        "treatmentShort"    = "15 sec video (vs. control)",
        "treatmentLong"    = "3 min video (vs. control)",
        
        "order"    = "Task order",
        "treatmentNudge:order" = "Accuracy nudge X task order",
        "treatmentShort:order" = "15 sec video X task order",
        "treatmentLong:order" = "3 min video X task order",
        
        "(Intercept)" = "Constant")

# Define what should be shown
gm <- tibble::tribble(
  ~raw,        ~clean,          ~fmt,
  "nobs",      "N",             0,
  "r.squared", "R2", 2,
  "adj.r.squared", "R2 Adj.", 2,
  "vcov.type", "SE clusters", 0)


##  Produce table
modelsummary(models,
  title = "Effect of interventions on sharing fake and real news headlines interacted with number of tasks completed. \\label{tab:H6}",
  estimate = "{estimate}{stars} ({std.error})",
  statistic = NULL,
  #vcov = ~ resid # Do you want clustered standard errors? For twoway cluster type e.g. "vcov = ~ resid + headline"
  stars = c("*" = .05, "**" = .01, "***" = .001),
  coef_map = cm, # If you didn't define cm, then put a "#" before this line
  gof_map = gm, # If you didn't define gm, then put a "#" before this line
  output = "latex") %>% 
  kable_styling(position = "center") %>%
  footnote(glue("Coefficients are OLS estimates. All regressions are based on observations from {nclus} respondents. Standard errors (in parentheses) are clustered at the respondent level. * p < 0.05, ** p < 0.01, *** p < 0.001"),
           threeparttable = TRUE) %>% 
  readr::write_lines(file = here::here(  "H6.tex"))

# Produce figure sharing_fake

g <- df %>%
  group_by(treatment, order) %>%
  get_summary_stats(sharing_fake, type = "mean_ci") %>%
  ggplot(aes(
    x = order,
    y = mean,
    group = treatment,
    colour = treatment
  ))
 
p <- g + geom_point()+
  facet_wrap(~treatment)+
  geom_smooth(method = "loess") +
  scale_y_continuous(limits = c(0,.4)) +
  ylab("Sharing fake headlines") +
  xlab("# Task") +
  theme(legend.position="none")

p

pdf(here::here( "0_study1_H6_fake.pdf"),
    height = 6,
    width = 7)
print(p)
dev.off()

ggsave(
  here::here( "0_study1_H6_fake.png"),
  height = 6,
  width = 7)

# Produce figure sharing_real

g <- df %>%
  group_by(treatment, order) %>%
  get_summary_stats(sharing_real, type = "mean_ci") %>%
  ggplot(aes(
    x = order,
    y = mean,
    group = treatment,
    colour = treatment
  ))
 
p <- g + geom_point()+
  facet_wrap(~treatment)+
  geom_smooth(method = "loess") +
  scale_y_continuous(limits = c(0,.4)) +
  ylab("Sharing real headlines") +
  xlab("# Task ") +
  theme(legend.position="none")

p

pdf(here::here( "0_study1_H6_real.pdf"),
    height = 6,
    width = 7)
print(p)
dev.off()

ggsave(
  here::here( "0_study1_H6_real.png"),
  height = 6,
  width = 7)

```


### 6.3 Additional supplementary analyses

#### 6.3.1 Predictors of headline sharing - indices

This plot shows regression coefficients for index for the outcomes 1) sharing all headlines, 2) sharing fake headlines, 3) sharing real headlines. **Conclusion**: None of the predictors significantly predict sharing


**Predictors** 

- Cognitive reflection
- Need for cognition
- Attention to social comparison information
- Trust (as a mean of all the trust measures)
- Trust in government
- Trust in public health authorities.

**Controls**

- Gender
- Age
- Income
- Education 

```{r}
load(here::here("final_data.Rdata")) # Load study 1

df <- df %>% 
  group_by(resid) %>% 
  mutate(
    mean_all = mean(sharing_all, na.rm = T),
    mean_fake = mean(sharing_fake, na.rm = T),
    mean_real = mean(sharing_real, na.rm = T)
  ) %>% 
  mutate(sharing_disc = mean_real-mean_fake, na.rm = T)

# Define vars
dvs <- c("sharing_fake", "sharing_real", "sharing_all", "sharing_disc")
ivs <-  c("index_crt", "index_socialcom", "index_nfc", "index_trust", "trust_health", "trust_govern", "trust_science")  
controls <-   c("female", "age4","income_hh","educ3", "vote_next2")

# Specificy formulas
fs <- crossing(dvs, ivs)  %>% 
  mutate(f = glue("{dvs} ~ {ivs} + {paste(controls, collapse = '+' )}")) 

# Apply lm function on formulas, create tidy dataframe 3) filter relevant coefficients
m <- fs %>%
  mutate(model = map(f, ~ lm_robust(formula = as.formula(.x), data = df, clusters = resid))) %>% 
  mutate(tidy_model = map(model, broom::tidy)) %>% 
  unnest(tidy_model) %>% 
  # This is just some cleaning if I want to plit the graph
  filter(
    term %in% c("index_crt", "index_socialcom", "index_nfc", "trust_health", "trust_govern", "trust_science") 
  ) %>% 
  mutate(
    where = as.factor(case_when(
    term %in% c("female", "age435-54", "age455-69","age470+","income_hh","educ3", "vote_next2") ~ "Demographics",
    term %in% c("index_crt", "index_socialcom", "index_nfc", 
                "index_trust", "trust_health", "trust_govern", "trust_science") ~ "Indices")),
    term = case_when(
      term == "index_crt" ~ "Cognitive reflection",
      term == "index_socialcom" ~ "Attention to social comparison",
      term == "index_nfc" ~ "Need for cognition",
      term == "trust_health" ~ "Trust health authorities",
      term == "trust_govern" ~ "Trust government",
      term == "trust_science" ~ "Trust scientists"),
    outcome = factor(
      outcome,
      levels = c("sharing_all",
                 "sharing_fake",
                 "sharing_real",
                 "sharing_disc"),
      labels = c("All headlines",
                 "False headlines",
                 "Real headlines",
                 "Sharing discernment")
    ))

  
# Create plot
m %>% 
  ggplot(aes(x = estimate, y = term, # Punktestimat
             xmin= conf.low, xmax = conf.high # Konfidensintervaller
            ))+ # Gruppe markør
  facet_wrap(~outcome, ncol = 4) +
  geom_point()+
  geom_pointrange(size = 0.3)+
  geom_vline(xintercept = 0)+
  geom_text(aes(label = sprintf("%0.2f (%0.2f)", estimate, std.error)),
            vjust = 2,
            position = position_dodge(width = 1),
            size = 3)+
  scale_x_continuous(limits = c(-.15,.15))+
  labs(y = "",
       x = "Sharing intentions (and discernment real-false headlines")

ggsave(
  here::here( "0_study1_predictors_indices.pdf"), height = 4, width = 6)

```

#### 6.3.2 Predictors of headline sharing - demographics

```{r}
load(here::here("final_data.Rdata")) # Load study 1


# Pivot
d = df %>%
  group_by(resid) %>% 
  mutate(
    mean_all = mean(sharing_all, na.rm = T),
    mean_fake = mean(sharing_fake, na.rm = T),
    mean_real = mean(sharing_real, na.rm = T)
  ) %>% 
  mutate(sharing_disc = mean_real-mean_fake, na.rm = T) %>% 
  ungroup() %>% 
  pivot_wider(
    id_cols = c(resid, 
                female,
                educ3,
                vote_next2,
                age4,
                income_hh,
                starts_with("mean_"), starts_with("index_"), sharing_disc, starts_with("trust_"), treatment),
    names_from = headline,
    values_from = c(sharing_all,sharing_fake, sharing_real)) %>% 
  select(!starts_with(c("sharing_all_","sharing_fake_","sharing_real_"))) %>%
  mutate(educ3 = haven::as_factor(educ3),
         income_cat = as.factor(case_when(income_hh %in% c(1:4) ~ 1,
                                income_hh %in% c(5:8) ~ 2,
                                income_hh %in% c(9:11) ~ 3)))

# Define vars
dvs <- c("mean_fake", "mean_real", "sharing_disc")
ivs <-   c("female", "age4","income_cat","educ3", "vote_next2")

# Specificy formulas
fs = crossing(dvs, ivs)  %>% 
  mutate(f = glue("{dvs} ~ {ivs}")) 

# Apply lm function on formulas, create tidy dataframe 3) filter relevant coefficients
m0 = fs %>%
  mutate(model = map(f, ~ lm(formula = as.formula(.x),
                             data = d)))

# Pull the "model" column and save data
m_temp = m0 %>% 
  pull(model) %>% 
  modelsummary(output = "modelsummary_list",
               vcov =  ~ resid,
               statistic = "conf.int") 

m_long = list(NULL)

# tidy each regression and bind them together
for (i in 1:length(m_temp)){
  m_temp[[i]] = m_temp[[i]] %>% tidy() %>%
    mutate(model = ifelse(i %in% 1:5, "False headlines",
                          ifelse(i %in% 6:10, "Real headlines",
                                 ifelse(i %in% 11:15, "Sharing discernment",
                                        NA))))
  m_long = rbind(m_long, m_temp[[i]])
}

m_long %>%
  filter(term != "(Intercept)") %>% 
  mutate(term = recode(
    term,
    "age435-54" = "Age 35-54 (vs. 18-34)",
    "age455-69" = "Age 55-69 (vs. 18-34)",
    "age470+" = "Age 70+ (vs. 18-34)",
    "educ32" = "Education: \nShort/medium-cycle \n (vs. Upper secondary)",
    "educ33" = "Education: \nLong cycle \n (vs. Upper secondary)",
    "female" = "Female",
    "income_cat2" = "Income: \n400,000-799,999 kr.\n(vs. 0-399,999 kr.)",
    "income_cat3" = "Income: \n800,000+ kr.\n(vs. 0-399,999 kr.)",
    "vote_next2" = "Right-wing"
  )) %>% 
  ggplot(aes(x = estimate, y = term, # Punktestimat
             xmin= conf.low, xmax = conf.high # Konfidensintervaller
  ))+ # Gruppe markør
  facet_wrap(~model, ncol = 4) +
  geom_point()+
  geom_pointrange(size = 0.3)+
  
  geom_vline(xintercept = 0)+
  geom_text(aes(label = sprintf("%0.2f (%0.2f)", estimate, std.error)),
            vjust = 2,
            position = position_dodge(width = 1),
            size = 3)+
  scale_x_continuous(limits = c(-.15,.15))+
  labs(y = "",
       x = "Sharing intentions (and discernment real-false headlines)")+
  theme(text = element_text(size = 10)) 
  

ggsave(
  here::here( "0_study1_predictors_demographics.pdf"), height = 4, width = 6)

```

#### 6.3.3 Partisanship interaction
```{r}

# Partisanship 

load(here::here( "final_data.Rdata")) # Load wave 

# Define stuff
p = ggplot(NULL) # Create empty list to store results
raw = list(NULL)
m = list(NULL)

# Define vars
dvs <- c("sharing_fake", "sharing_real")
ins <-  c("vote_next2")  
ivs <- c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs, ins)  %>%
  mutate(
    f = glue("{dvs} ~ {ivs} * {ins}")
  )

# Run apply function on dataframes
for (i in 1:length(df)){
  raw <- fs %>%
    mutate(model = map(f, ~ lm(
      formula = as.formula(.x),
      data = df
    ))) 
}


# Coef labels
cm <- c("treatmentNudge"    = "Accuracy nudge (vs. control)",
        "treatmentShort"    = "15 sec video (vs. control)",
        "treatmentLong"    = "3 min video (vs. control)",
        
        "vote_next2"    = "Partishanship (Right-wing)",
        "treatmentNudge:vote_next2" = "Accuracy nudge X\n Partishanship (Right-wing)",
        "treatmentShort:vote_next2" = "15 sec video X\n Partishanship (Right-wing)",
        "treatmentLong:vote_next2" = "3 min video X\n Partishanship (Right-wing)",
        
        "(Intercept)" = "Constant")

# Define what should be shown
gm <- tibble::tribble(
  ~raw,        ~clean,          ~fmt,
  "nobs",      "N",             0,
  "r.squared", "R2", 2,
  "adj.r.squared", "R2 Adj.", 2,
  "vcov.type", "SE clusters", 0)


# Define the three models
models <- list(
  "False headline sharing" = raw$model[[1]],
  "Real headline sharing" = raw$model[[2]])


##  Produce table
modelsummary(models,
  title = "Effect of interventions X partisanship on sharing. \\label{tab:partisan_interact}",
  vcov = ~ resid + headline,
  stars = c("*" = .05, "**" = .01, "***" = .001),
  coef_map = cm, 
  gof_map = gm,
  output = "latex") %>% 
  kable_styling(position = "center", 
                latex_options = c("scale_down"),
                font_size = 7,
               full_width = FALSE) %>% 
  readr::write_lines(file = here::here(  "intervention_partisanship_interaction.tex"))



```


#### 6.3.4 Are threat appraisal, self-efficacy, and response efficacy related to greater
sharing discernment?

```{r}
load(here::here( "wave3_data.Rdata")) # Load data

df <- wave3

# Define vars
dvs <- c("mean_fake", "mean_real","sharing_disc")
ivs <-  c(
"respeff",
"selfeff",
"threat")  

# Specificy formulas
fs <- crossing(dvs, ivs)  %>%
  mutate(f = glue("{dvs} ~ {ivs}"))


# Calculate means and sharing discernment
df <- df %>% 
  group_by(resid) %>% 
  mutate(
    mean_all = mean(sharing_all, na.rm = T),
    mean_fake = mean(sharing_fake, na.rm = T),
    mean_real = mean(sharing_real, na.rm = T)
  ) %>% 
  mutate(sharing_disc = mean_real-mean_fake, na.rm = T)

# Widen dataset, as we change the unit of analysis from headline to respondent

df = df %>%
  pivot_wider(
    id_cols = c(
      resid,
      starts_with("mean_"),
      starts_with("index_"),
      resid,
      threat,
      selfeff,
      respeff,
      treatment,
      sharing_disc
    ),
    names_from = headline,
    values_from = c(sharing_all, sharing_fake, sharing_real)
  ) 


# Apply lm function on formulas, create tidy dataframe 3) filter relevant coefficients
m0 <- fs %>%
  mutate(model = map(f, ~ lm_robust(
    formula = as.formula(.x),
    data = df, 
    clusters = resid
  )))  


# Edit model for visualization
m <- m0 %>% 
  mutate(tidy_model = map(model, broom::tidy)) %>% 
  unnest(tidy_model) %>% 
  filter(term != "(Intercept)") %>% 
  mutate(term = fct_relevel(term, "threat", "selfeff","respeff"),
         outcome = fct_relevel(outcome, "mean_real","mean_fake","sharing_disc"))

# Create model
ggplot(data = m, aes(x = estimate, y = term, group = outcome)) + 
  geom_point() +
  geom_errorbar(aes(xmin = conf.low, xmax = conf.high), width = 0) +
  facet_wrap(~ outcome, ncol = 1) +
  
  geom_vline(aes(xintercept = 0),
             colour = "darkgrey",
             size = 1) +
  geom_text(aes(label = sprintf("%0.2f (%0.2f)", estimate, std.error)),
            vjust = 2,
            position = position_dodge(width = 1),
            size = 3)+
  scale_x_continuous(limits = c(-0.29, .19))+
  scale_y_discrete(labels = c("threat" = "Threat appraisal",
                              "selfeff" = "Self-efficacy",
                            "respeff" = "Response efficacy"))+
  
  labs(y = "",
       x = "Sharing discernment")

ggsave(
 here::here( "0_pmt_sharing_discernment.pdf"), height = 6, width = 4)

```

#### 6.3.5 Replication of treatment effects in Study 2
Suggested by editor and reviewer (comment 8)
```{r}
load(here::here( "wave3_data.Rdata")) # Load wave 

df <- wave3 %>% as.data.frame() 

# I also want to add an interaction term, which I do below. 
# Define vars
dvs <- c("sharing_fake", "sharing_real")
ivs <-  c("treatment")


# Specificy formulas
fs <- crossing(dvs, ivs)  %>%
  mutate(f = glue("{dvs} ~ {ivs}")) %>% 
  # Add row with interaction term
  add_row(f = glue("sharing_all ~ {ivs}*real"))


# Run regression for all models by applying lm function on formulas
m0 <- fs %>%
  mutate(model = map(f, ~ feols(
    fml = as.formula(.x),
    data = df,
    cluster = c("resid", "headline")
  ))) 

# Get coefficients
m0$model %>% map(tidy) %>% 
  map_if(is.data.frame, ~mutate_if(.x, is.numeric, round, 3))

no_obs <- m0[["model"]][[1]][["nobs"]]

# calculate mean sharing discernment and collapse by respondent
df_disc <- df %>%
  group_by(resid, treatment) %>%
  summarise(sharing_disc = mean(sharing_real, na.rm = T)-mean(sharing_fake, na.rm = T)) 

# Define vars
dvs <- c("sharing_disc")
ivs <-  c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs)  %>%
  mutate(f = glue("{dvs} ~ {ivs}"))

# Apply lm function on formulas, create tidy dataframe 3) filter relevant coefficients
m0_disc <- fs %>%
  mutate(model = map(f, ~ feols(
    fml = as.formula(.x),
    data = df_disc,
    cluster = c("resid")
  ))) 

m0_disc$model %>% map(tidy) %>% 
  map_if(is.data.frame, ~mutate_if(.x, is.numeric, round, 3))

# combine models
m0 <- rbind(m0, m0_disc)

# Define the three models
models <- list(
  "False headline sharing" = m0$model[[1]],
  "Real headline sharing" = m0$model[[2]],
  "Sharing discernment" = m0$model[[4]],
  "All headlines (Intervention X veracity)" = m0$model[[3]]
)


# Get some statistics
temp = models %>% modelsummary(statistic = "conf.int", output = "modelsummary_list") %>% map(tidy)  
    
# Round to 3 digits
temp %>%
  map_if(is.data.frame, ~mutate_if(.x, is.numeric, round, 3))

#%>% 
#  .$`Sharing discernment` %>% 
#  mutate(across(.cols = where(is.numeric),
#                # anonymous function
#                .fns = \(x) round(x, digits = 3)))


# This edits the labels (not necessary)
cm <- c(
  "treatmentNudge" = "Accuracy nudge (vs. control)",
  "treatmentShort" = "15 sec video (vs. control)",
  "treatmentLong" = "3 min video (vs. control)",
  "realreal" = "Headline veracity (0 = false, 1 = true)",
  "treatmentNudge:realreal" = "Accuracy nudge X veracity",
  "treatmentShort:realreal" = "15 sec video X veracity",
  "treatmentLong:realreal" = "3 min video X veracity",
  '(Intercept)' = 'Constant'
)

# Define what should be shown
gm <- tibble::tribble(
  ~raw,        ~clean,          ~fmt,
  "nobs",      "N",             0,
  "r.squared", "R2", 2,
  "adj.r.squared", "R2 Adj.", 2)

# Create table
modelsummary(
  models,
  title = "Regression output of figure \\ref{fig:study2_treatment_replication}: Willingness to share headlines (Study 2). \\label{tab:study2_treatment_replication}",
  coef_map = cm,
  gof_map = gm,
  estimate  = "{estimate} ({std.error}){stars}",
  statistic = NULL,
  stars = c('*' = .05, '**' = .01, '***' = .001),
               output = "latex",
  escape = F) %>% 
  kable_styling(latex_options = c("hold_position",
                                  "scale_down",
                                   notation="none"),
                font_size = 7,
                 full_width = FALSE) %>% 
  footnote(glue("'False headline sharing' shows the regression output of the left panel in figure 9. 'Real headline sharing' shows the regression output of the middle panel. 'Sharing discernment' shows the regression output of the right panel. The discernment score is calculated as the respondent level difference between mean real and false headline sharing, that is, sharing real - sharing false. 'All headlines (Treatment X veracity)' shows a regression where all headlines are included with the interaction of the treatment interventions and the veracity of the headline on sharing intention. Coefficients are OLS estimates. Standard errors (in parentheses) are clustered at the respondent level. All regressions are based on observations from {no_obs} participants * p < 0.05, ** p < 0.01, *** p < 0.001"),
           threeparttable = TRUE) %>% 
  save_kable(file = here::here(  "study2_treatment_replication.tex"))


#### Plot ####

# Define the three models
models <- list(
  "False headlines" = m0$model[[1]],
  "Real headlines" = m0$model[[2]],
  "Sharing discernment" = m0$model[[4]]
)

m = modelplot(models, draw = FALSE) %>% 
    filter(term != "(Intercept)") %>% 
  mutate(
  term = factor(
    term,
    levels = c("treatmentLong",
               "treatmentShort",
               "treatmentNudge"),
    labels = c("3 min video",
               "15 sec video",
               "Accuracy nudge")
  ))

# Create plot
m %>%
  ggplot(aes(x = estimate,
             y = term)) +
  geom_vline(xintercept = 0) +
  geom_point() +
  geom_errorbarh(
    aes(xmin = conf.low, xmax = conf.high),
    height = 0) +
  geom_text(aes(label = sprintf("%0.2f (%0.2f)", estimate, std.error)),
            vjust = 2,
            position = position_dodge(width = 1),
            size = 2)+
  scale_x_continuous(
    limits = c(-.11, .11),
    labels = dropLeadingZero)+
    facet_wrap( ~ model, ncol = 3L) +
  xlab("") +
  ylab("")


ggsave(
  here::here( "study2_treatment_replication.pdf"),
  height = 2,
  width = 6)

```

##### Marginal means for experimental conditions in study 1 and study 2
Here we show that the baseline for the control condition moves

```{r}
# Load datasets
load(here::here( "final_data.Rdata")) # Load wave 
wave2 = df

load(here::here( "wave3_data.Rdata")) # Load wave 
wave3 = wave3 %>% as.data.frame()


# Calculate cohen's d
wave2 %>% cohens_d(sharing_fake ~ treatment, var.equal = TRUE)

wave3 %>% cohens_d(sharing_fake ~ treatment, var.equal = TRUE)

# Define vars
dvs <- c("sharing_fake", "sharing_real")
ivs <-  c("treatment")
df <- list("wave2"= wave2, "wave2" = wave3)

# Specificy formulas
fs <- crossing(dvs, ivs)  %>%
  mutate(f = glue("{dvs} ~ {ivs}"))

# Apply lm function on formulas, create tidy dataframe 3) filter relevant coefficients

# Run apply funciton on dataframes
raw = list(NULL)

for (i in 1:length(df)){
raw[[i]] <- fs %>%
  mutate(model = map(f, ~ lm_robust(
    formula = as.formula(.x),
    data = df[[i]],
    clusters = resid
  ))) %>% 
  pull(model) 
}

############################ T-test# ###############################

# Perform two-sample t-test

# T-test for control
# Start by filtering to keep control condition
group2 = wave2 %>% 
  filter(treatment == "Control")
group3 = wave3 %>% 
  filter(treatment == "Control")

# t-test
c_f = t.test(group2$sharing_fake, group3$sharing_fake, var.equal=TRUE) %>% tidy() %>% mutate(model = "Control False")
c_r = t.test(group2$sharing_real, group3$sharing_real, var.equal=TRUE) %>% tidy() %>% mutate(model = "Control Real")

# T-test for Long
group2 = wave2 %>% 
  filter(treatment == "Long")
group3 = wave3 %>% 
  filter(treatment == "Long")

l_f = t.test(group2$sharing_fake, group3$sharing_fake, var.equal=TRUE) %>% tidy() %>% mutate(model = "Long False")
l_r = t.test(group2$sharing_real, group3$sharing_real, var.equal=TRUE) %>% tidy() %>% mutate(model = "Long Real")

rbind(c_f, c_r, l_f,l_r) %>% select(model, everything())

d = NULL

#### Combine to a graph 

# Wave 2 (study 1) false 
w2f = predictions(raw[[1]][[1]], by = "treatment") %>% tidy() %>% 
  as.data.frame() %>% 
  mutate(sharing = "False",
         Study = "Study 1")

# Wave 2 (study 1) real 
w2r = predictions(raw[[1]][[2]], by = "treatment") %>% tidy() %>% 
  as.data.frame() %>% 
  mutate(sharing = "Real",
         Study = "Study 1")

# Wave 3 (study 2) false 
w3f = predictions(raw[[2]][[1]], by = "treatment") %>% tidy() %>% 
  as.data.frame() %>% 
  mutate(sharing = "False",
         Study = "Study 2")

# Wave 3 (study 2) real 
w3r = predictions(raw[[2]][[2]], by = "treatment") %>% tidy() %>% 
  as.data.frame() %>% 
  mutate(sharing = "Real",
         Study = "Study 2")

# Combine data to grpah
d = rbind(w2f,
          w2r,
          w3f,
          w3r) %>%   
  #rename_with(~str_remove(., "^treatment\\."), everything()) %>% 
  mutate(treatment = factor(
      rowid,
      levels = c(1, 2, 3, 4),
      labels = c("Control",
                 "Accuracy nudge",
                 "15 sec video",
                 "3 min video")))

# Create plot
d %>% ggplot(
  aes(
    x = treatment,
    y = estimate,
    color = Study
  ))+
  facet_wrap(~sharing)+
  geom_point(position = position_dodge(width = 0.2))+
  geom_errorbar(aes(ymin=conf.low, ymax=conf.high), 
                linewidth=.5, width=.2,
                position = position_dodge(0.2))+
  geom_text(aes(label = sprintf("%0.2f (%0.2f)", estimate, std.error)),
            vjust = 2,
            position = position_dodge(width = 1),
            size = 2)

ggsave(
  here::here( "study2_treatment_replication_w2_w3_mm.pdf"),
  height = 3,
  width = 6)
```


##### Effect size
```{r}
# Load data
load(here::here( "wave3_data.Rdata")) # Load wave 

df <- wave3 %>% as.data.frame

# Calculate cohen's d

# false sharing
df %>%
  mutate(treatment = fct_rev(treatment)) %>% 
  cohens_d(sharing_fake ~ treatment,
           var.equal = TRUE) %>% 
  filter(group2 == "Control") %>% 
  pull(effsize, group1) %>% 
  round(digits = 2)

# real sharing
df %>%
  mutate(treatment = fct_rev(treatment)) %>% 
  cohens_d(sharing_real ~ treatment,
           var.equal = TRUE) %>% 
  filter(group2 == "Control") %>% 
  pull(effsize, group1) %>% 
  round(digits = 2)


df %>%
  group_by(resid, treatment) %>%
  summarise(sharing_disc = mean(sharing_real, na.rm = T)-mean(sharing_fake, na.rm = T)) %>% 
  ungroup() %>% 
  mutate(treatment = fct_rev(treatment)) %>% 
  cohens_d(sharing_disc ~ treatment, var.equal = TRUE) %>%  
  mutate_if(is.numeric, ~round(., 3)) %>% 
  filter(group2 == "Control") %>% 
  pull(effsize,group1)
  
```

#### 6.3.6 Subset on attention check and social media users
##### Descriptives for attention checks

```{r}
load(here::here( "final_data.Rdata")) # Load wave 

# Pivot
df <- df %>%
  pivot_wider(
    id_cols = c(resid, starts_with("mean_"), starts_with("attention"), treatment),
    names_from = headline,
    values_from = c(sharing_all,sharing_fake, sharing_real)
  ) %>% select(!starts_with("sharing_"))

# Attention check 1
df <- df %>% 
  mutate(advice3 = 
           case_when(
            attention1_1 == 1 & attention1_3 == 1 & attention1_4 == 1 ~ 3,
            attention1_1 == 1 & attention1_3 == 1 | 
                attention1_3 == 1 & attention1_4 == 1 |
                attention1_1 == 1 & attention1_4 == 1 ~ 2,
            attention1_1 == 1 | attention1_3 == 1 | attention1_4 == 1 ~ 1,
            attention1_1 == 0 | attention1_3 == 0 | attention1_4 == 0 ~ 0),
         attention1_pass = if_else(advice3 > 0, 1, 0))

table(df$advice3)

table(df$attention1_pass)
mean(df$attention1_pass, na.rm = T)

datasummary_skim(df)

# Attention check 2

# Create attention test pass variable
df <- df %>%
  mutate(attention2_pass = if_else(attention2 == 1 & treatment == "Control" |
                              attention2 == 1 & treatment == "Nudge" |
                              attention2 == 2 & treatment == "Short" |
                              attention2 == 3 & treatment == "Long", 1, 0))


# Test how many respondents passed attention check
mean(df$attention2_pass, na.rm = T)
table(df$attention2_pass)

```

##### Regression among attentive respondents

```{r}
load(here::here( "final_data.Rdata")) # Load wave 

df <- df %>%
  mutate(attention2_pass = if_else(attention2 == 1 & treatment == "Control" |
                              attention2 == 1 & treatment == "Nudge" |
                              attention2 == 2 & treatment == "Short" |
                              attention2 == 3 & treatment == "Long", 1, 0))


df <- df %>%
  filter(attention2_pass == 1)

# I also want to add an interaction term, which I do below. 
# Define vars
dvs <- c("sharing_fake", "sharing_real")
ivs <-  c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs)  %>%
  mutate(f = glue("{dvs} ~ {ivs}")) %>% 
  # Add row with interaction term
  add_row(f = glue("sharing_all ~ {ivs}*real"))

# Run regression for all models by applying lm function on formulas
m0 <- fs %>%
  mutate(model = map(f, ~ lm_robust(
    formula = as.formula(.x),
    data = df,
    clusters = resid
  ))) 

nclus <- m0[["model"]][[1]][["nclusters"]]

# calculate mean sharing discernment and collapse by respondent
df_disc <- df %>%
  group_by(resid, treatment) %>%
  summarise(sharing_disc = mean(sharing_real, na.rm = T)-mean(sharing_fake, na.rm = T))


# Define vars
dvs <- c("sharing_disc")
ivs <-  c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs)  %>%
  mutate(f = glue("{dvs} ~ {ivs}"))

# Apply lm function on formulas, create tidy dataframe 3) filter relevant coefficients
m0_disc <- fs %>%
  mutate(model = map(f, ~ lm_robust(
    formula = as.formula(.x),
    data = df_disc,
    clusters = resid
  ))) 

# combine models
m0 <- rbind(m0, m0_disc)

# Define the three models
models <- list(
  "False headline sharing" = m0$model[[1]],
  "Real headline sharing" = m0$model[[2]],
  "Sharing discernment" = m0$model[[4]],
  "All headlines (Intervention X veracity)" = m0$model[[3]]
)

# This edits the labels (not necessary)
cm <- c(
  "treatmentNudge" = "Accuracy nudge (vs. control)",
  "treatmentShort" = "15 sec video (vs. control)",
  "treatmentLong" = "3 min video (vs. control)",
  "realreal" = "Headline veracity (0 = false, 1 = true)",
  "treatmentNudge:realreal" = "Accuracy nudge X veracity",
  "treatmentShort:realreal" = "15 sec video X veracity",
  "treatmentLong:realreal" = "3 min video X veracity",
  '(Intercept)' = 'Constant'
)


# Define what should be shown
gm <- tibble::tribble(
  ~raw,        ~clean,          ~fmt,
  "nobs",      "N",             0,
  "r.squared", "R2", 2,
  "adj.r.squared", "R2 Adj.", 2,
  "vcov.type", "SE clusters", 0)

# Create table
modelsummary(
  models,
  title = "Willingness to share headlines among attentive respondents. \\label{tab:fig1_regout_attention}",
  coef_map = cm,
  gof_map = gm,
  estimate  = "{estimate} ({std.error}){stars}",
  statistic = NULL,
  stars = c('*' = .05, '**' = .01, '***' = .001),
               output = "latex")  %>% 
  kable_styling(latex_options = c("hold_position",
                                  "scale_down",
                                   notation="none"),
                font_size = 7,
                 full_width = FALSE) %>% 
  footnote(glue("Sharing discernment is calculated as the respondent level difference between mean real and false headline sharing, that is, sharing real - sharing false. 'All headlines (Treatment X veracity)' shows a regression where all headlines are included with the interaction of the treatment interventions and the veracity of the headline on sharing intention. Coefficients are OLS estimates. Standard errors (in parentheses) are clustered at the respondent level. All regressions are based on observations from {nclus} respondents. * p < 0.05, ** p < 0.01, *** p < 0.001"),
           threeparttable = TRUE) %>% 
  save_kable(file = here::here(  "fig1_regoutput_attention.tex"))
  

```

##### Regression among social media users
These regressions are filtered on social media user

```{r}
load(here::here( "final_data.Rdata")) # Load wave 


df <- df %>%
  filter(facebook_user == 1)


# I also want to add an interaction term, which I do below. 
# Define vars
dvs <- c("sharing_fake", "sharing_real")
ivs <-  c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs)  %>%
  mutate(f = glue("{dvs} ~ {ivs}")) %>% 
  # Add row with interaction term
  add_row(f = glue("sharing_all ~ {ivs}*real"))

# Run regression for all models by applying lm function on formulas
m0 <- fs %>%
  mutate(model = map(f, ~ lm_robust(
    formula = as.formula(.x),
    data = df,
    clusters = resid
  ))) 

nclus <- m0[["model"]][[1]][["nclusters"]]

# calculate mean sharing discernment and collapse by respondent
df_disc <- df %>%
  group_by(resid, treatment) %>%
  summarise(sharing_disc = mean(sharing_real, na.rm = T)-mean(sharing_fake, na.rm = T))


# Define vars
dvs <- c("sharing_disc")
ivs <-  c("treatment")

# Specificy formulas
fs <- crossing(dvs, ivs)  %>%
  mutate(f = glue("{dvs} ~ {ivs}"))

# Apply lm function on formulas, create tidy dataframe 3) filter relevant coefficients
m0_disc <- fs %>%
  mutate(model = map(f, ~ lm_robust(
    formula = as.formula(.x),
    data = df_disc,
    clusters = resid
  ))) 

# combine models
m0 <- rbind(m0, m0_disc)

# Define the three models
models <- list(
  "False headline sharing" = m0$model[[1]],
  "Real headline sharing" = m0$model[[2]],
  "Sharing discernment" = m0$model[[4]],
  "All headlines (Intervention X veracity)" = m0$model[[3]]
)

# This edits the labels (not necessary)
cm <- c(
  "treatmentNudge" = "Accuracy nudge (vs. control)",
  "treatmentShort" = "15 sec video (vs. control)",
  "treatmentLong" = "3 min video (vs. control)",
  "realreal" = "Headline veracity (0 = false, 1 = true)",
  "treatmentNudge:realreal" = "Accuracy nudge X veracity",
  "treatmentShort:realreal" = "15 sec video X veracity",
  "treatmentLong:realreal" = "3 min video X veracity",
  '(Intercept)' = 'Constant'
)


# Define what should be shown
gm <- tibble::tribble(
  ~raw,        ~clean,          ~fmt,
  "nobs",      "N",             0,
  "r.squared", "R2", 2,
  "adj.r.squared", "R2 Adj.", 2,
  "vcov.type", "SE clusters", 0)

# Create table
modelsummary(
  models,
  title = "Willingness to share headlines among Facebook users. \\label{tab:fig1_regout_facebook}",
  coef_map = cm,
  gof_map = gm,
  estimate  = "{estimate} ({std.error}){stars}",
  statistic = NULL,
  stars = c('*' = .05, '**' = .01, '***' = .001),
               output = "latex")  %>% 
  kable_styling(latex_options = c("hold_position",
                                  "scale_down",
                                   notation="none"),
                font_size = 7,
                 full_width = FALSE) %>% 
  footnote(glue("Sharing discernment is calculated as the respondent level difference between mean real and false headline sharing, that is, sharing real - sharing false. 'All headlines (Treatment X veracity)' shows a regression where all headlines are included with the interaction of the treatment interventions and the veracity of the headline on sharing intention. Coefficients are OLS estimates. Standard errors (in parentheses) are clustered at the respondent level. All regressions are based on observations from {nclus} respondents. * p < 0.05, ** p < 0.01, *** p < 0.001"),
           threeparttable = TRUE) %>% 
  save_kable(file = here::here(  "fig1_regoutput_facebook.tex"))
  

```


